def update_custom(facets, datadir, dataset_ids=None, debug=False): print_message("Generating custom facet mapfile", 'ok') if not dataset_ids: dataset_ids = [] for path in datadir: dataset_ids.extend(collect_dataset_ids(path)) print_message("Sending custom facets to the ESGF node", 'ok') cert_path = Path(os.environ['HOME'] + '/.globus/certificate-file') if not cert_path.exists(): raise ValueError( f"The globus certificate doesnt exist where its expected, {str(cert_path.resolve())}" ) cert_path = str(cert_path.resolve()) for dataset in tqdm(dataset_ids): url = "https://esgf-node.llnl.gov/esg-search/ws/updateById" for facet in facets: idx = facet.index('=') key = facet[:idx] val = facet[idx + 1:] obj = { "id": dataset + '|esgf-data2.llnl.gov', "action": "set", "field": key, "value": val, "core": "datasets" } res = requests.get(url, data=obj, verify=False, cert=cert_path) if res.status_code != 200: print(f"Error sending request {obj}, got response {res}") return 0
def publish(mapsin, mapsout, mapserr, ini, loop, sproket='sproket', cred_file=None, debug=False): if not os.path.exists(cred_file): raise ValueError('The given credential file does not exist') if cred_file: with open(cred_file, 'r') as ip: creds = json.load(ip) try: username = creds['username'] except: raise ValueError("Missing username from credetial file") try: password = creds['password'] except: raise ValueError("Missing password from credential file") else: username = None password = None if loop: print_message("Starting publisher loop", 'ok') else: print_message("Starting one-off publisher", 'ok') while True: mapfiles = [x for x in os.listdir(mapsin) if x.endswith('.map')] if mapfiles: publish_maps(mapfiles, ini, mapsin, mapsout, mapserr, username, password, debug=debug, sproket=sproket) if not loop: break sleep(30) return 0
def publish(mapsin, mapsout, mapserr, loop, logpath, sproket='sproket', no_custom=False, debug=False): if loop: print_message("Starting publisher loop", 'ok') else: print_message("Starting one-off publisher", 'ok') while True: mapfiles = [x for x in os.listdir(mapsin) if x.endswith('.map')] if mapfiles: publish_maps(mapfiles, mapsin, mapsout, mapserr, logpath, debug=debug, no_custom=no_custom, sproket=sproket) if not loop: break sleep(30) return 0
def generate_custom(facets, outpath='./custom_facets.map', mapdir=None, datadir=None, debug=False): for facet in facets: if facet.index('=') == -1: raise ValueError( 'Facets must be in the form of facet_name=facet_value, {} does not have an "="' .format(facet)) facet_str = " | ".join(facets) output = [] if mapdir: maplist = [ os.path.join(mapdir, f) for f in os.listdir(mapdir) if os.path.isfile(os.path.join(mapdir, f)) ] if debug: print_message("mapfiles:", 'info') for item in maplist: print_message('\t' + item, 'info') for m in maplist: with open(m, "r") as amaplines: aline = amaplines.readline() dataset_id = aline.split(' ')[0] hash_index = dataset_id.find('#') dataset_id = dataset_id[:hash_index] output.append(f"{dataset_id} | {facet_str}\n") if 'CMIP6' in output[0].split('|')[0]: project = 'cmip6' else: project = 'e3sm' else: if not datadir: raise ValueError( "If no mapfile directory is given, a datadir must be used") if isinstance(datadir, str): datadir = [datadir] for p in datadir: dataset_ids, project = collect_dataset_ids(p) for dataset in dataset_ids: output.append(f"{dataset_id} | {facet_str}\n") with open(outpath, 'w') as outfile: for line in output: if debug: print_message(line, 'info') outfile.write(line) return project
def publish_maps(mapfiles, ini, mapsin, mapsout, mapserr, username=None, password=None, sproket='spoket', debug=False): for m in mapfiles: if debug: print_message(f'Starting mapfile: {m}', 'info') if m[-4:] != '.map': msg = "Unrecognized file type, this doesnt appear to be an ESGF mapfile. Moving to the err directory {}".format(m) print_message(msg) os.rename( os.path.join(mapsin, m), os.path.join(mapserr, m)) continue if check_ds_exists(m[:-4], debug=debug, sproket=sproket): msg = f"Dataset {m[:-4]} already exists" print_message(msg, 'err') os.rename( os.path.join(mapsin, m), os.path.join(mapserr, m)) continue if m[:5] == 'CMIP6': project = 'cmip6' elif m[:4] == 'E3SM': project = 'e3sm' else: raise ValueError( "Unrecognized project name for mapfile: {}".format(m)) if debug: print_message("Running myproxy-logon with stored credentials", 'info') script = f"""#!/bin/sh source /usr/local/conda/bin/activate esgf-pub echo {password} | myproxyclient logon -S -s esgf-node.llnl.gov -l {username} -t 72 -o ~/.globus/certificate-file""" tempfile = "login.sh" if os.path.exists(tempfile): os.remove(tempfile) with open(tempfile, 'w') as fp: fp.write(script) st = os.stat(tempfile) os.chmod(tempfile, st.st_mode | stat.S_IEXEC) try: check_call('./' + tempfile) except CalledProcessError as error: print_message("Error while creating myproxy-logon certificate") return error.returncode os.remove(tempfile) map_path = os.path.join(mapsin, m) script = f"""#!/bin/sh source /usr/local/conda/bin/activate esgf-pub esgpublish -i {ini} --project {project} --map {map_path} --no-thredds-reinit --commit-every 100 if [ $? -ne 0 ]; then exit $?; fi esgpublish -i {ini} --project {project} --map {map_path} --service fileservice --noscan --thredds --no-thredds-reinit if [ $? -ne 0 ]; then exit $?; fi esgpublish --project {project} --thredds-reinit esgpublish -i {ini} --project {project} --map {map_path} --service fileservice --noscan --publish if [ $? -ne 0 ]; then exit $?; fi """ tempfile = "pub_script.sh" if os.path.exists(tempfile): os.remove(tempfile) with open(tempfile, 'w') as fp: fp.write(script) st = os.stat(tempfile) os.chmod(tempfile, st.st_mode | stat.S_IEXEC) if debug: print_message(f'Running publication script: {tempfile}', 'info') print_message(script, 'info') try: start = datetime.now() check_call('./' + tempfile) end = datetime.now() except CalledProcessError as error: print_message( f"Error in publication, moving {m} to {mapserr}", "error") os.rename( os.path.join(mapsin, m), os.path.join(mapserr, m)) else: print_message( f"Publication success, runtime: {end - start}", "info") os.rename( os.path.join(mapsin, m), os.path.join(mapsout, m))
def update_custom(facets, outpath='./custom_facets.map', generate_only=False, mapdir=None, datadir=None, debug=False): print_message("Generating custom facet mapfile", 'ok') project = generate_custom(facets=facets, outpath=outpath, mapdir=mapdir, datadir=datadir, debug=debug) print_message("Mapfile generation complete", 'ok') if generate_only: return 0 print_message("Sending custom facets to the ESGF node", 'ok') facet_update_string = f"""#!/bin/sh source /usr/local/conda/bin/activate esgf-pub esgadd_facetvalues --project {project} --map {outpath} --noscan --thredds --service fileservice""" if debug: print_message(facet_update_string, 'info') update_script = 'update_custom.sh' with open(update_script, 'w') as op: op.write(facet_update_string) st = os.stat(update_script) os.chmod(update_script, st.st_mode | stat.S_IEXEC) proc = Popen(['./' + update_script], shell=True, stdout=PIPE, stderr=PIPE) out, err = proc.communicate() if debug: print_message(out) print_message(err) for line in err.split('\n'): if "Writing THREDDS catalog" in line: search_string = "/esg/content/thredds/esgcet/" idx = line.index(search_string) xml_path = line[idx + len(search_string):] cmd = f"""wget --no-check-certificate --ca-certificate ~/.globus/certificate-file --certificate ~/.globus/certificate-file --private-key ~/.globus/certificate-file --verbose --post-data="uri=https://aims3.llnl.gov/thredds/catalog/esgcet/{xml_path}&metadataRepositoryType=THREDDS" https://esgf-node.llnl.gov/esg-search/ws/harvest""" print(cmd) os.popen(cmd) return 0
def publish_maps(mapfiles, mapsin, mapsout, mapserr, logpath, sproket='spoket', no_custom=False, debug=False): os.makedirs(logpath, exist_ok=True) with TemporaryDirectory() as tmpdir: for m in mapfiles: if not m.endswith('.map'): continue print_message(f"Starting publication for {m}", 'ok') datasetID = m[:-4] project = datasetID.split('.')[0] if check_ds_exists(datasetID, debug=debug, sproket=sproket): msg = f"Dataset {datasetID} already exists" print_message(msg, 'err') os.rename(os.path.join(mapsin, m), os.path.join(mapserr, m)) continue if project == 'CMIP6': project = 'cmip6' project_metadata = None elif project == 'E3SM': if not no_custom: campaign, driver, period = get_facet_info(datasetID) if campaign and driver and period: project_metadata_path = os.path.join( tmpdir, f'{datasetID}.json') project_metadata = { 'Campaign': campaign, 'Science Driver': driver, 'Period': period } with open(project_metadata_path, 'w') as op: json.dump(project_metadata, op) else: raise ValueError( "Unrecognized project name for mapfile: {}".format(m)) map_path = os.path.join(mapsin, m) cmd = f"esgpublish --project {project} --map {map_path}".split() if project_metadata and not no_custom: cmd.extend(['--json', project_metadata_path]) print_message(f"Running: {' '.join(cmd)}", 'ok') log = os.path.join(logpath, f"{datasetID}.log") print_message(f"Writing publication log to {log}", 'ok') with open(log, 'w') as outstream: proc = Popen(cmd, stdout=outstream, stderr=outstream, universal_newlines=True) proc.wait() if proc.returncode != 0: if proc.stderr: print(proc.stderr.readlines(), flush=True) print_message( f"Error in publication, moving {m} to {mapserr}\n", "error") os.rename(os.path.join(mapsin, m), os.path.join(mapserr, m)) else: print_message( f"Publication success, moving {m} to {mapsout}\n", "info") os.rename(os.path.join(mapsin, m), os.path.join(mapsout, m))
def stage(ARGS): debug = ARGS.debug if ARGS.over_write: overwrite = True else: overwrite = False try: with open(ARGS.config, 'r') as ip: CONFIG = yaml.load(ip, Loader=yaml.SafeLoader) except SyntaxError as error: print_message("Unable to parse config file, is it valid yaml?") print(repr(error)) return 1 try: BASEOUTPUT = CONFIG['output_path'] MODEL_VERSION = CONFIG['model_version'] ATMRES = CONFIG['atmospheric_resolution'] OCNRES = CONFIG['ocean_resolution'] DATA_PATHS = CONFIG['data_paths'] ENSEMBLE = CONFIG['ensemble'] EXPERIMENT_NAME = CONFIG['experiment'] GRID = CONFIG.get('non_native_grid') START = int(CONFIG['start_year']) END = int(CONFIG['end_year']) except ValueError as error: print_message('Unable to find values in config file') print(repr(error)) return 1 print_message('Validating raw data', 'ok') if not validate_raw(DATA_PATHS, START, END): return 1 base_path = os.path.join(BASEOUTPUT, MODEL_VERSION) resdirname = "{}_atm_{}_ocean".format(ATMRES, OCNRES) makedir(os.path.join(base_path, EXPERIMENT_NAME, resdirname)) transfer_mode = ARGS.transfer_mode if transfer_mode == 'move': print_message('Moving files', 'ok') elif transfer_mode == 'copy': print_message('Copying files', 'ok') elif transfer_mode == 'link': print_message('Linking files', 'ok') num_moved, paths = transfer_files(outpath=base_path, experiment=EXPERIMENT_NAME, grid=GRID, mode=transfer_mode, data_paths=DATA_PATHS, ensemble=ENSEMBLE, overwrite=overwrite) if num_moved == -1: return 1 RUNMAPS = CONFIG.get('mapfiles', False) if not RUNMAPS or RUNMAPS not in [True, 'true', 'True', 1, '1']: print_message('Not running mapfile generation', 'ok') print_message('Publication prep complete', 'ok') return 0 else: print_message('Starting mapfile generation', 'ok') try: INIPATH = CONFIG['ini_path'] MAPOUT = ARGS.mapout except: raise ValueError( "Mapfiles generation is turned on, but the config is missing the ini_path option" ) NUMWORKERS = CONFIG.get('num_workers', 4) event = Event() pbar = tqdm(desc="Generating mapfiles", total=num_moved) res = -1 try: for path in paths: res = mapfile_gen(basepath=path, inipath=INIPATH, outpath=MAPOUT, maxprocesses=NUMWORKERS, env_name=ARGS.mapfile_env, debug=debug, event=event, pbar=pbar) pbar.close() except KeyboardInterrupt as error: print_message('Keyboard interrupt caught, exiting') event.set() return 1 else: if res == 0: print_message('Publication prep complete', 'ok') else: print_message( 'mapfile generation exited with status: {}'.format(res), 'error') return res