def submit_product(ds, met): uid = ds['id'] ds_dir = os.path.join(os.getcwd(), uid) ingest(uid, './datasets.json', app.conf.GRQ_UPDATE_URL, app.conf.DATASET_PROCESSED_QUEUE, ds_dir, None) if os.path.exists(uid): shutil.rmtree(uid)
def ingest_acq_dataset(ds, met, ds_cfg, browse=False): """Create acquisition dataset and ingest.""" tmp_dir = tempfile.mkdtemp() id, ds_dir = create_acq_dataset(ds, met, tmp_dir, browse) ingest(id, ds_cfg, app.conf.GRQ_UPDATE_URL, app.conf.DATASET_PROCESSED_QUEUE, ds_dir, None) shutil.rmtree(tmp_dir)
def main(): """Main.""" # read in context context_file = os.path.abspath("_context.json") if not os.path.exists(context_file): raise (RuntimeError("Context file doesn't exist.")) with open(context_file) as f: ctx = json.load(f) # resolve acquisition id from slc id slc_id = ctx['slc_id'] slc_version = ctx['slc_version'] acq_version = ctx['acquisition_version'] acq_id = resolve_acq(slc_id, acq_version) logger.info("acq_id: {}".format(acq_id)) # pull all acq-list datasets with acquisition id in either master or slave list ifgcfg_version = ctx['ifgcfg_version'] acqlist_version = ctx['acqlist_version'] acqlists = get_acqlists_by_acqid(acq_id, acqlist_version) logger.info("Found {} matching acq-list datasets".format(len(acqlists))) for acqlist in acqlists: logger.info(json.dumps(acqlist, indent=2)) acq_info = {} for acq in acqlist['metadata']['master_scenes']: acq_info[acq] = get_acq_object(acq, "master") for acq in acqlist['metadata']['slave_scenes']: acq_info[acq] = get_acq_object(acq, "slave") if all_slcs_exist(acq_info.keys(), acq_version, slc_version): prod_dir = publish_data(acq_info, acqlist['metadata']['project'], acqlist['metadata']['job_priority'], acqlist['metadata']['dem_type'], acqlist['metadata']['track_number'], acqlist['metadata']['starttime'], acqlist['metadata']['endtime'], acqlist['metadata']['master_scenes'], acqlist['metadata']['slave_scenes'], acqlist['metadata']['orbitNumber'], acqlist['metadata']['direction'], acqlist['metadata']['platform'], acqlist['metadata']['union_geojson'], acqlist['metadata']['bbox']) logger.info("Created ifg-cfg {} for acq-list {}.".format( prod_dir, acqlist['id'])) if ifgcfg_exists(prod_dir, ifgcfg_version): logger.info("Not ingesting ifg-cfg {}. Already exists.".format( prod_dir)) else: ingest(prod_dir, 'datasets.json', app.conf.GRQ_UPDATE_URL, app.conf.DATASET_PROCESSED_QUEUE, os.path.abspath(prod_dir), None) logger.info("Ingesting ifg-cfg {}.".format(prod_dir)) shutil.rmtree(prod_dir) else: logger.info("Not creating ifg-cfg for acq-list {}.".format( acqlist['id']))
def submit_product(ds, met): uid = ds['label'] ds_dir = os.path.join(os.getcwd(), uid) try: ingest(uid, './datasets.json', app.conf.GRQ_UPDATE_URL, app.conf.DATASET_PROCESSED_QUEUE, ds_dir, None) if os.path.exists(uid): shutil.rmtree(uid) except Exception: print('failed on submission of {0}'.format(uid))
def ingest_acq_dataset(starttime, endtime, ds_cfg ="/home/ops/verdi/etc/datasets.json"): """ Ingest acquisition dataset :param starttime: :param endtime: :param ds_cfg: path to datasets.json TO DO: (somewhat done) Change the ingestion mechanism similar to Scihub acquisition ingest Create a tmp dir and make dataset dir inside that After ingestion delete the tmp dir Background: tried to do that but when searching for dir names starting with 'acquisition-' but didn't find any in the tempdir. So nothing was being ingested. I reverted changes from commits: bd2c26eb2c8ec66e7ec4785b86ce77a65b4394ff e82f94a6103f38db5b07f1cd1a2e5766b3be0d00 9d6e17c121b024e8f64eafbe86bba0d82df14202 :return: """ existing = get_existing_acqs(starttime, endtime) 'for every folder staring with `acquisition-` call ingest' acq_dirs = filter(lambda x: x.startswith('acquisition-'), os.listdir('.')) total_ingested = 0 total_ingest_failed = 0 total_existing = 0 failed_publish = [] for dir in acq_dirs: if os.path.isdir(dir): acq_id = dir abspath_dir = os.path.abspath(acq_id) if dir not in existing: try: ingest(acq_id, ds_cfg, app.conf.GRQ_UPDATE_URL, app.conf.DATASET_PROCESSED_QUEUE, abspath_dir, None) LOGGER.info("Successfully ingested dataset {}".format(acq_id)) shutil.rmtree(acq_id) total_ingested += 1 except Exception as e: LOGGER.error("Failed to ingest dataset {}".format(acq_id)) LOGGER.error("Exception: {}".format(e)) failed_publish.append(acq_id) total_ingest_failed += 1 else: LOGGER.info("acquisition found in existing, will delete directory: %s" % acq_id) shutil.rmtree(acq_id) total_existing += 1 LOGGER.info('#' * 100) LOGGER.info('total ingested: %i' % total_ingested) LOGGER.info('total existing: %i' % total_existing) LOGGER.info('total ingest failed: %i' % total_ingest_failed) LOGGER.info('list of failed ingests: {}'.format(failed_publish)) LOGGER.info('#' * 100) return
def ingest_acq_dataset(ds, met, ds_cfg, browse=False): """Create acquisition dataset and ingest.""" tmp_dir = tempfile.mkdtemp() id, ds_dir = create_acq_dataset(ds, met, tmp_dir, browse) try: ingest(id, ds_cfg, app.conf.GRQ_UPDATE_URL, app.conf.DATASET_PROCESSED_QUEUE, ds_dir, None) shutil.rmtree(tmp_dir) return True except Exception as e: logging.info("Ingest Exception: {}".format(e)) return False
def ingest_acq_dataset(starttime, endtime, ds_cfg ="/home/ops/verdi/etc/datasets.json"): """Ingest acquisition dataset.""" existing = get_existing_acqs(starttime, endtime) for dir in os.listdir('.'): if os.path.isdir(dir): id = dir if id.startswith("acquisition-"): if id.replace("-asf", "-esa_scihub") not in existing: try: ingest(id, ds_cfg, app.conf.GRQ_UPDATE_URL, app.conf.DATASET_PROCESSED_QUEUE, dir, None) shutil.rmtree(id) except Exception as e: print("Failed to ingest dataset {}".format(id)) failed_publish.append(id) return
def ingest_product(ds, met): '''publish a product directly''' uid = ds['label'] shortname = met.get('short_name', '*') save_product_met(uid, ds, met) ds_dir = os.path.join(os.getcwd(), uid) if exists(uid, shortname): print('Product already exists with uid: {}. Passing on publish...'. format(uid)) return print('Product with uid: {} does not exist. Publishing...'.format(uid)) try: ingest(uid, './datasets.json', app.conf.GRQ_UPDATE_URL, app.conf.DATASET_PROCESSED_QUEUE, ds_dir, None) if os.path.exists(uid): shutil.rmtree(uid) except: raise Exception('failed on submission of {0}'.format(uid))
from __future__ import absolute_import from future import standard_library standard_library.install_aliases() import os import sys import argparse import logging import hysds.orchestrator from hysds.celery import app from hysds.dataset_ingest import ingest logging.basicConfig(level=logging.INFO) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Ingest dataset into HySDS.") parser.add_argument('ds_dir', help="dataset directory") parser.add_argument('datasets_cfg', help="datasets config JSON") parser.add_argument('-d', '--dry-run', action='store_true', help="Don't upload dataset or ingest into GRQ") parser.add_argument('-f', '--force', action='store_true', help="Force publish of dataset even if it clobbers") args = parser.parse_args() ingest(os.path.basename(os.path.normpath(args.ds_dir)), args.datasets_cfg, app.conf.GRQ_UPDATE_URL, app.conf.DATASET_PROCESSED_QUEUE, os.path.abspath(args.ds_dir), None, args.dry_run, args.force)