def create_acq_dataset(ds, met, root_ds_dir=".", browse=False): """Create acquisition dataset. Return tuple of (dataset ID, dataset dir).""" # create dataset dir id = "acquisition-{}-esa_scihub".format(met["title"]) root_ds_dir = os.path.abspath(root_ds_dir) ds_dir = os.path.join(root_ds_dir, id) if not os.path.isdir(ds_dir): os.makedirs(ds_dir, 0755) # append source to met met['query_api'] = "opensearch" # set IPF version to None met['processing_version'] = None # dump dataset and met JSON ds_file = os.path.join(ds_dir, "%s.dataset.json" % id) met_file = os.path.join(ds_dir, "%s.met.json" % id) with open(ds_file, 'w') as f: json.dump(ds, f, indent=2, sort_keys=True) with open(met_file, 'w') as f: json.dump(met, f, indent=2, sort_keys=True) # create browse? if browse: browse_jpg = os.path.join(ds_dir, "browse.jpg") browse_png = os.path.join(ds_dir, "browse.png") browse_small_png = os.path.join(ds_dir, "browse_small.png") get(met['icon'], browse_jpg) check_call(["convert", browse_jpg, browse_png]) os.unlink(browse_jpg) check_call( ["convert", "-resize", "250x250", browse_png, browse_small_png]) return id, ds_dir
def create_acq_dataset(ds, met, manifest, root_ds_dir=".", browse=False): """Create acquisition dataset. Return tuple of (dataset ID, dataset dir).""" # create dataset dir id = "acquisition-{}_{}_{}_{}-esa_scihub".format( met["platform"], get_timestamp_for_filename(met["sensingStart"]), met["trackNumber"], met["sensoroperationalmode"]) root_ds_dir = os.path.abspath(root_ds_dir) ds_dir = os.path.join(root_ds_dir, id) if not os.path.isdir(ds_dir): os.makedirs(ds_dir, 0755) # append source to met met['query_api'] = "opensearch" # append processing version (ipf) # ns = get_namespaces(manifest) # x = fromstring(manifest) # ipf = x.xpath('.//xmlData/safe:processing/safe:facility/safe:software/@version', namespaces=ns)[0] # met['processing_version'] = ipf met['processing_version'] = None # dump dataset and met JSON ds_file = os.path.join(ds_dir, "%s.dataset.json" % id) met_file = os.path.join(ds_dir, "%s.met.json" % id) with open(ds_file, 'w') as f: json.dump(ds, f, indent=2, sort_keys=True) with open(met_file, 'w') as f: json.dump(met, f, indent=2, sort_keys=True) # dump manifest manifest_file = os.path.join(ds_dir, "manifest.safe") with open(manifest_file, 'w') as f: f.write(manifest) # create browse? if browse: browse_jpg = os.path.join(ds_dir, "browse.jpg") browse_png = os.path.join(ds_dir, "browse.png") browse_small_png = os.path.join(ds_dir, "browse_small.png") get(met['icon'], browse_jpg) check_call(["convert", browse_jpg, browse_png]) os.unlink(browse_jpg) check_call( ["convert", "-resize", "250x250", browse_png, browse_small_png]) return id, ds_dir
def crawl(ds_es_url, dataset_version, tag): """Crawl for calibration files and create datasets if they don't exist in ES.""" active_ids = [] for id, url in crawl_cals(dataset_version): #logger.info("%s: %s" % (id, url)) active_ids.append(id) total, found_id = check_cal(ds_es_url, "grq", id) if total > 0: logger.info("Found %s." % id) else: logger.info("Missing %s. Creating dataset." % id) cal_tar_file = os.path.basename(url) get(url, cal_tar_file) safe_tar_file = cal_tar_file.replace('.TGZ', '') shutil.move(cal_tar_file, safe_tar_file) create_cal_ds(safe_tar_file, ds_es_url, dataset_version) purge_active_cal_ds(ds_es_url, dataset_version) create_active_cal_ds(active_ids, dataset_version)
for url in doc['urls']: if url.startswith('s3://'): prod_url = url break if prod_url is None: print "Failed to find s3 url for prod %s" % prod_id continue if os.path.exists(work_dir): shutil.rmtree(work_dir) os.makedirs(work_dir, 0755) os.chdir(work_dir) merged_dir = "merged" if os.path.exists(merged_dir): shutil.rmtree(merged_dir) os.makedirs(merged_dir, 0755) unw_prod_file = "filt_topophase.unw.geo" unw_prod_url = "%s/merged/%s" % (prod_url, unw_prod_file) get(unw_prod_url, "merged/{}".format(unw_prod_file)) for i in ('hdr', 'vrt', 'xml'): get("{}.{}".format(unw_prod_url, i), "merged/{}.{}".format(unw_prod_file, i)) #print json.dumps(doc, indent=2) # clean out tiles if exists parsed_url = urlparse(prod_url) tiles_url = "s3://{}/tiles".format(parsed_url.path[1:]) cmd = "aws s3 rm --recursive {}" check_call(cmd.format(tiles_url), shell=True) # create displacement tile layer vrt_prod_file = "{}.vrt".format(unw_prod_file) dis_layer = "displacement"
def export(args): """Export HySDS package.""" cont_id = args.id # container id # query for container cont = mozart_es.get_by_id(index=CONTAINERS_INDEX, id=cont_id, ignore=404) if cont['found'] is False: logger.error("SDS package id {} not found.".format(cont_id)) return 1 cont_info = cont['_source'] logger.debug("cont_info: %s" % json.dumps(cont_info, indent=2)) # set export directory outdir = normpath(args.outdir) export_name = "{}.sdspkg".format(cont_id.replace(':', '-')) export_dir = os.path.join(outdir, export_name) logger.debug("export_dir: %s" % export_dir) if os.path.exists(export_dir): # if directory exists, stop logger.error( "SDS package export directory {} exists. Not continuing.".format( export_dir)) return 1 validate_dir(export_dir) # create export directory # download container get(cont_info['url'], export_dir) cont_info['url'] = os.path.basename(cont_info['url']) query = { "query": { "term": { "container.keyword": cont_id } # query job specs } } job_specs = mozart_es.query(index=JOB_SPECS_INDEX, body=query) job_specs = [job_spec['_source'] for job_spec in job_specs] logger.debug("job_specs: %s" % json.dumps(job_specs, indent=2)) # backwards-compatible query if len(job_specs) == 0: logger.debug("Got no job_specs. Checking deprecated mappings:") query = { "query": { "query_string": { "query": "container:\"{}\"".format(cont_id) } } } job_specs = mozart_es.query(index=JOB_SPECS_INDEX, body=query) job_specs = [job_spec['_source'] for job_spec in job_specs] logger.debug("job_specs: %s" % json.dumps(job_specs, indent=2)) hysds_ios = [ ] # pull hysds_ios for each job_spec and download any dependency images dep_images = {} for job_spec in job_specs: # download dependency images for d in job_spec.get('dependency_images', []): if d['container_image_name'] in dep_images: d['container_image_url'] = dep_images[ d['container_image_name']] else: # download container get(d['container_image_url'], export_dir) d['container_image_url'] = os.path.basename( d['container_image_url']) dep_images[ d['container_image_name']] = d['container_image_url'] # collect hysds_ios from mozart query = { "query": { "term": { "job-specification.keyword": job_spec['id'] } } } mozart_hysds_ios = mozart_es.query(index=HYSDS_IOS_MOZART_INDEX, body=query) mozart_hysds_ios = [ hysds_io['_source'] for hysds_io in mozart_hysds_ios ] logger.debug("Found %d hysds_ios on mozart for %s." % (len(mozart_hysds_ios), job_spec['id'])) # backwards-compatible query if len(mozart_hysds_ios) == 0: logger.debug( "Got no hysds_ios from mozart. Checking deprecated mappings:") query = { "query": { "query_string": { "query": "job-specification:\"{}\"".format(job_spec['id']) } } } mozart_hysds_ios = mozart_es.query(index=HYSDS_IOS_MOZART_INDEX, body=query) mozart_hysds_ios = [ hysds_io['_source'] for hysds_io in mozart_hysds_ios ] logger.debug("Found %d hysds_ios on mozart for %s." % (len(mozart_hysds_ios), job_spec['id'])) hysds_ios.extend(mozart_hysds_ios) # collect hysds_ios from grq query = { "query": { "term": { "job-specification.keyword": job_spec['id'] } } } grq_hysds_ios = mozart_es.query(index=HYSDS_IOS_GRQ_INDEX, body=query) grq_hysds_ios = [hysds_io['_source'] for hysds_io in grq_hysds_ios] logger.debug("Found %d hysds_ios on grq for %s." % (len(grq_hysds_ios), job_spec['id'])) # backwards-compatible query if len(mozart_hysds_ios) == 0: logger.debug( "Got no hysds_ios from grq. Checking deprecated mappings:") query = { "query": { "query_string": { "query": "job-specification:\"{}\"".format(job_spec['id']) } } } grq_hysds_ios = mozart_es.query(index=HYSDS_IOS_GRQ_INDEX, body=query) grq_hysds_ios = [hysds_io['_source'] for hysds_io in grq_hysds_ios] logger.debug("Found %d hysds_ios on grq for %s." % (len(grq_hysds_ios), job_spec['id'])) hysds_ios.extend(grq_hysds_ios) logger.debug("Found %d hysds_ios total." % (len(hysds_ios))) # export allowed accounts if not args.accounts: for hysds_io in hysds_ios: if 'allowed_accounts' in hysds_io: del hysds_io['allowed_accounts'] # dump manifest JSON manifest = { "containers": cont_info, "job_specs": job_specs, "hysds_ios": hysds_ios, } manifest_file = os.path.join(export_dir, 'manifest.json') with open(manifest_file, 'w') as f: json.dump(manifest, f, indent=2, sort_keys=True) # tar up hysds package tar_file = os.path.join(outdir, "{}.tar".format(export_name)) with tarfile.open(tar_file, "w") as tar: tar.add(export_dir, arcname=os.path.relpath(export_dir, outdir)) shutil.rmtree(export_dir) # remove package dir
def export(args): """Export HySDS package.""" # get user's SDS conf settings conf = SettingsConf() # container id cont_id = args.id # query for container mozart_es_url = "http://{}:9200".format(conf.get('MOZART_ES_PVT_IP')) grq_es_url = "http://{}:9200".format(conf.get('GRQ_ES_PVT_IP')) hits = run_query(mozart_es_url, "containers", {"query": { "term": { "_id": cont_id } }}) if len(hits) == 0: logger.error("SDS package id {} not found.".format(cont_id)) return 1 cont_info = hits[0]['_source'] logger.debug("cont_info: {}".format(json.dumps(cont_info, indent=2))) # set export directory outdir = normpath(args.outdir) export_name = "{}.sdspkg".format(cont_id) export_dir = os.path.join(outdir, export_name) logger.debug("export_dir: {}".format(export_dir)) # if directory exists, stop if os.path.exists(export_dir): logger.error( "SDS package export directory {} exists. Not continuing.".format( export_dir)) return 1 # create export directory validate_dir(export_dir) # download container get(cont_info['url'], export_dir) cont_info['url'] = os.path.basename(cont_info['url']) # query job specs job_specs = [ i['_source'] for i in run_query(mozart_es_url, "job_specs", {"query": { "term": { "container.raw": cont_id } }}) ] logger.debug("job_specs: {}".format(json.dumps(job_specs, indent=2))) # pull hysds_ios for each job_spec and download any dependency images hysds_ios = [] dep_images = {} for job_spec in job_specs: # download dependency images for d in job_spec.get('dependency_images', []): if d['container_image_name'] in dep_images: d['container_image_url'] = dep_images[ d['container_image_name']] else: # download container get(d['container_image_url'], export_dir) d['container_image_url'] = os.path.basename( d['container_image_url']) dep_images[ d['container_image_name']] = d['container_image_url'] # collect hysds_ios from mozart mozart_hysds_ios = [ i['_source'] for i in run_query( mozart_es_url, "hysds_ios", {"query": { "term": { "job-specification.raw": job_spec['id'] } }}) ] logger.debug("Found {} hysds_ios on mozart for {}.".format( len(mozart_hysds_ios), job_spec['id'])) hysds_ios.extend(mozart_hysds_ios) # collect hysds_ios from mozart grq_hysds_ios = [ i['_source'] for i in run_query( grq_es_url, "hysds_ios", {"query": { "term": { "job-specification.raw": job_spec['id'] } }}) ] logger.debug("Found {} hysds_ios on grq for {}.".format( len(grq_hysds_ios), job_spec['id'])) hysds_ios.extend(grq_hysds_ios) logger.debug("Found {} hysds_ios total.".format(len(hysds_ios))) # clean out allowed accounts for hysds_io in hysds_ios: if 'allowed_accounts' in hysds_io: del hysds_io['allowed_accounts'] # dump manifest JSON manifest = { "containers": cont_info, "job_specs": job_specs, "hysds_ios": hysds_ios, } manifest_file = os.path.join(export_dir, 'manifest.json') with open(manifest_file, 'w') as f: json.dump(manifest, f, indent=2, sort_keys=True) # tar up hysds package tar_file = os.path.join(outdir, "{}.tar".format(export_name)) with tarfile.open(tar_file, "w") as tar: tar.add(export_dir, arcname=os.path.relpath(export_dir, outdir)) # remove package dir shutil.rmtree(export_dir)