예제 #1
0
def update_custom(facets, datadir, dataset_ids=None, debug=False):

    print_message("Generating custom facet mapfile", 'ok')
    if not dataset_ids:
        dataset_ids = []
        for path in datadir:
            dataset_ids.extend(collect_dataset_ids(path))

    print_message("Sending custom facets to the ESGF node", 'ok')

    cert_path = Path(os.environ['HOME'] + '/.globus/certificate-file')
    if not cert_path.exists():
        raise ValueError(
            f"The globus certificate doesnt exist where its expected, {str(cert_path.resolve())}"
        )
    cert_path = str(cert_path.resolve())

    for dataset in tqdm(dataset_ids):
        url = "https://esgf-node.llnl.gov/esg-search/ws/updateById"
        for facet in facets:
            idx = facet.index('=')
            key = facet[:idx]
            val = facet[idx + 1:]
            obj = {
                "id": dataset + '|esgf-data2.llnl.gov',
                "action": "set",
                "field": key,
                "value": val,
                "core": "datasets"
            }
            res = requests.get(url, data=obj, verify=False, cert=cert_path)
            if res.status_code != 200:
                print(f"Error sending request {obj}, got response {res}")

    return 0
예제 #2
0
def publish(mapsin, mapsout, mapserr, ini, loop, sproket='sproket', cred_file=None, debug=False):

    if not os.path.exists(cred_file):
        raise ValueError('The given credential file does not exist')

    if cred_file:
        with open(cred_file, 'r') as ip:
            creds = json.load(ip)
            try:
                username = creds['username']
            except:
                raise ValueError("Missing username from credetial file")
            try:
                password = creds['password']
            except:
                raise ValueError("Missing password from credential file")
    else:
        username = None
        password = None

    if loop:
        print_message("Starting publisher loop", 'ok')
    else:
        print_message("Starting one-off publisher", 'ok')
    while True:
        mapfiles = [x for x in os.listdir(mapsin) if x.endswith('.map')]
        if mapfiles:
            publish_maps(mapfiles, ini, mapsin, mapsout,
                        mapserr, username, password, 
                        debug=debug, sproket=sproket)
        if not loop:
            break
        sleep(30)

    return 0
예제 #3
0
def publish(mapsin,
            mapsout,
            mapserr,
            loop,
            logpath,
            sproket='sproket',
            no_custom=False,
            debug=False):

    if loop:
        print_message("Starting publisher loop", 'ok')
    else:
        print_message("Starting one-off publisher", 'ok')
    while True:
        mapfiles = [x for x in os.listdir(mapsin) if x.endswith('.map')]
        if mapfiles:
            publish_maps(mapfiles,
                         mapsin,
                         mapsout,
                         mapserr,
                         logpath,
                         debug=debug,
                         no_custom=no_custom,
                         sproket=sproket)
        if not loop:
            break
        sleep(30)

    return 0
예제 #4
0
def generate_custom(facets,
                    outpath='./custom_facets.map',
                    mapdir=None,
                    datadir=None,
                    debug=False):
    for facet in facets:
        if facet.index('=') == -1:
            raise ValueError(
                'Facets must be in the form of facet_name=facet_value, {} does not have an "="'
                .format(facet))
    facet_str = " | ".join(facets)

    output = []
    if mapdir:
        maplist = [
            os.path.join(mapdir, f) for f in os.listdir(mapdir)
            if os.path.isfile(os.path.join(mapdir, f))
        ]
        if debug:
            print_message("mapfiles:", 'info')
            for item in maplist:
                print_message('\t' + item, 'info')
        for m in maplist:
            with open(m, "r") as amaplines:
                aline = amaplines.readline()
                dataset_id = aline.split(' ')[0]
                hash_index = dataset_id.find('#')
                dataset_id = dataset_id[:hash_index]
                output.append(f"{dataset_id} | {facet_str}\n")
        if 'CMIP6' in output[0].split('|')[0]:
            project = 'cmip6'
        else:
            project = 'e3sm'
    else:
        if not datadir:
            raise ValueError(
                "If no mapfile directory is given, a datadir must be used")
        if isinstance(datadir, str):
            datadir = [datadir]
        for p in datadir:
            dataset_ids, project = collect_dataset_ids(p)
            for dataset in dataset_ids:
                output.append(f"{dataset_id} | {facet_str}\n")

    with open(outpath, 'w') as outfile:
        for line in output:
            if debug:
                print_message(line, 'info')
            outfile.write(line)

    return project
예제 #5
0
def publish_maps(mapfiles, ini, mapsin, mapsout, mapserr, username=None, password=None, sproket='spoket', debug=False):
    for m in mapfiles:
        if debug:
            print_message(f'Starting mapfile: {m}', 'info')
        if m[-4:] != '.map':
            msg = "Unrecognized file type, this doesnt appear to be an ESGF mapfile. Moving to the err directory {}".format(m)
            print_message(msg)
            os.rename(
                os.path.join(mapsin, m),
                os.path.join(mapserr, m))
            continue
        if check_ds_exists(m[:-4], debug=debug, sproket=sproket):
            msg = f"Dataset {m[:-4]} already exists"
            print_message(msg, 'err')
            os.rename(
                os.path.join(mapsin, m),
                os.path.join(mapserr, m))
            continue
        if m[:5] == 'CMIP6':
            project = 'cmip6'
        elif m[:4] == 'E3SM':
            project = 'e3sm'
        else:
            raise ValueError(
                "Unrecognized project name for mapfile: {}".format(m))

        if debug:
            print_message("Running myproxy-logon with stored credentials", 'info')

        script = f"""#!/bin/sh
source /usr/local/conda/bin/activate esgf-pub
echo {password} | myproxyclient logon -S -s esgf-node.llnl.gov -l {username} -t 72 -o ~/.globus/certificate-file"""

        tempfile = "login.sh"
        if os.path.exists(tempfile):
            os.remove(tempfile)

        with open(tempfile, 'w') as fp:
            fp.write(script)
        st = os.stat(tempfile)
        os.chmod(tempfile, st.st_mode | stat.S_IEXEC)
        try:
            check_call('./' + tempfile)
        except CalledProcessError as error:
            print_message("Error while creating myproxy-logon certificate")
            return error.returncode
        os.remove(tempfile)
        map_path = os.path.join(mapsin, m)
        script = f"""#!/bin/sh
source /usr/local/conda/bin/activate esgf-pub
esgpublish -i {ini} --project {project} --map {map_path} --no-thredds-reinit --commit-every 100
if [ $? -ne  0 ]; then exit $?; fi
esgpublish -i {ini} --project {project} --map {map_path} --service fileservice --noscan --thredds  --no-thredds-reinit
if [ $? -ne  0 ]; then exit $?; fi
esgpublish --project {project} --thredds-reinit
esgpublish -i {ini} --project {project} --map {map_path} --service fileservice --noscan --publish
if [ $? -ne  0 ]; then exit $?; fi
"""

        tempfile = "pub_script.sh"
        if os.path.exists(tempfile):
            os.remove(tempfile)

        with open(tempfile, 'w') as fp:
            fp.write(script)
        st = os.stat(tempfile)
        os.chmod(tempfile, st.st_mode | stat.S_IEXEC)

        if debug:
            print_message(f'Running publication script: {tempfile}', 'info')
            print_message(script, 'info')

        try:
            start = datetime.now()
            check_call('./' + tempfile)
            end = datetime.now()
        except  CalledProcessError as error:
            print_message(
                f"Error in publication, moving {m} to {mapserr}", "error")
            os.rename(
                os.path.join(mapsin, m),
                os.path.join(mapserr, m))
        else:
            print_message(
                f"Publication success, runtime: {end - start}", "info")
            os.rename(
                os.path.join(mapsin, m),
                os.path.join(mapsout, m))
예제 #6
0
def update_custom(facets,
                  outpath='./custom_facets.map',
                  generate_only=False,
                  mapdir=None,
                  datadir=None,
                  debug=False):

    print_message("Generating custom facet mapfile", 'ok')
    project = generate_custom(facets=facets,
                              outpath=outpath,
                              mapdir=mapdir,
                              datadir=datadir,
                              debug=debug)
    print_message("Mapfile generation complete", 'ok')

    if generate_only:
        return 0

    print_message("Sending custom facets to the ESGF node", 'ok')
    facet_update_string = f"""#!/bin/sh
source /usr/local/conda/bin/activate esgf-pub
esgadd_facetvalues --project {project} --map {outpath} --noscan --thredds --service fileservice"""
    if debug:
        print_message(facet_update_string, 'info')
    update_script = 'update_custom.sh'
    with open(update_script, 'w') as op:
        op.write(facet_update_string)
    st = os.stat(update_script)
    os.chmod(update_script, st.st_mode | stat.S_IEXEC)

    proc = Popen(['./' + update_script], shell=True, stdout=PIPE, stderr=PIPE)
    out, err = proc.communicate()
    if debug:
        print_message(out)
        print_message(err)
    for line in err.split('\n'):
        if "Writing THREDDS catalog" in line:
            search_string = "/esg/content/thredds/esgcet/"
            idx = line.index(search_string)
            xml_path = line[idx + len(search_string):]
            cmd = f"""wget --no-check-certificate --ca-certificate ~/.globus/certificate-file --certificate ~/.globus/certificate-file --private-key ~/.globus/certificate-file --verbose --post-data="uri=https://aims3.llnl.gov/thredds/catalog/esgcet/{xml_path}&metadataRepositoryType=THREDDS" https://esgf-node.llnl.gov/esg-search/ws/harvest"""
            print(cmd)
            os.popen(cmd)
    return 0
예제 #7
0
def publish_maps(mapfiles,
                 mapsin,
                 mapsout,
                 mapserr,
                 logpath,
                 sproket='spoket',
                 no_custom=False,
                 debug=False):
    os.makedirs(logpath, exist_ok=True)
    with TemporaryDirectory() as tmpdir:

        for m in mapfiles:
            if not m.endswith('.map'):
                continue

            print_message(f"Starting publication for {m}", 'ok')

            datasetID = m[:-4]
            project = datasetID.split('.')[0]
            if check_ds_exists(datasetID, debug=debug, sproket=sproket):
                msg = f"Dataset {datasetID} already exists"
                print_message(msg, 'err')
                os.rename(os.path.join(mapsin, m), os.path.join(mapserr, m))
                continue
            if project == 'CMIP6':
                project = 'cmip6'
                project_metadata = None
            elif project == 'E3SM':
                if not no_custom:
                    campaign, driver, period = get_facet_info(datasetID)
                    if campaign and driver and period:
                        project_metadata_path = os.path.join(
                            tmpdir, f'{datasetID}.json')
                        project_metadata = {
                            'Campaign': campaign,
                            'Science Driver': driver,
                            'Period': period
                        }
                        with open(project_metadata_path, 'w') as op:
                            json.dump(project_metadata, op)
            else:
                raise ValueError(
                    "Unrecognized project name for mapfile: {}".format(m))

            map_path = os.path.join(mapsin, m)
            cmd = f"esgpublish --project {project} --map {map_path}".split()
            if project_metadata and not no_custom:
                cmd.extend(['--json', project_metadata_path])

            print_message(f"Running: {' '.join(cmd)}", 'ok')
            log = os.path.join(logpath, f"{datasetID}.log")
            print_message(f"Writing publication log to {log}", 'ok')

            with open(log, 'w') as outstream:
                proc = Popen(cmd,
                             stdout=outstream,
                             stderr=outstream,
                             universal_newlines=True)
                proc.wait()

            if proc.returncode != 0:
                if proc.stderr:
                    print(proc.stderr.readlines(), flush=True)
                print_message(
                    f"Error in publication, moving {m} to {mapserr}\n",
                    "error")
                os.rename(os.path.join(mapsin, m), os.path.join(mapserr, m))
            else:
                print_message(
                    f"Publication success, moving {m} to {mapsout}\n", "info")
                os.rename(os.path.join(mapsin, m), os.path.join(mapsout, m))
예제 #8
0
파일: stager.py 프로젝트: TonyB9000/esgfpub
def stage(ARGS):

    debug = ARGS.debug

    if ARGS.over_write:
        overwrite = True
    else:
        overwrite = False

    try:
        with open(ARGS.config, 'r') as ip:
            CONFIG = yaml.load(ip, Loader=yaml.SafeLoader)
    except SyntaxError as error:
        print_message("Unable to parse config file, is it valid yaml?")
        print(repr(error))
        return 1

    try:
        BASEOUTPUT = CONFIG['output_path']
        MODEL_VERSION = CONFIG['model_version']
        ATMRES = CONFIG['atmospheric_resolution']
        OCNRES = CONFIG['ocean_resolution']
        DATA_PATHS = CONFIG['data_paths']
        ENSEMBLE = CONFIG['ensemble']
        EXPERIMENT_NAME = CONFIG['experiment']
        GRID = CONFIG.get('non_native_grid')
        START = int(CONFIG['start_year'])
        END = int(CONFIG['end_year'])
    except ValueError as error:
        print_message('Unable to find values in config file')
        print(repr(error))
        return 1

    print_message('Validating raw data', 'ok')
    if not validate_raw(DATA_PATHS, START, END):
        return 1

    base_path = os.path.join(BASEOUTPUT, MODEL_VERSION)

    resdirname = "{}_atm_{}_ocean".format(ATMRES, OCNRES)
    makedir(os.path.join(base_path, EXPERIMENT_NAME, resdirname))

    transfer_mode = ARGS.transfer_mode
    if transfer_mode == 'move':
        print_message('Moving files', 'ok')
    elif transfer_mode == 'copy':
        print_message('Copying files', 'ok')
    elif transfer_mode == 'link':
        print_message('Linking files', 'ok')
    num_moved, paths = transfer_files(outpath=base_path,
                                      experiment=EXPERIMENT_NAME,
                                      grid=GRID,
                                      mode=transfer_mode,
                                      data_paths=DATA_PATHS,
                                      ensemble=ENSEMBLE,
                                      overwrite=overwrite)
    if num_moved == -1:
        return 1

    RUNMAPS = CONFIG.get('mapfiles', False)
    if not RUNMAPS or RUNMAPS not in [True, 'true', 'True', 1, '1']:
        print_message('Not running mapfile generation', 'ok')
        print_message('Publication prep complete', 'ok')
        return 0
    else:
        print_message('Starting mapfile generation', 'ok')

    try:
        INIPATH = CONFIG['ini_path']
        MAPOUT = ARGS.mapout
    except:
        raise ValueError(
            "Mapfiles generation is turned on, but the config is missing the ini_path option"
        )
    NUMWORKERS = CONFIG.get('num_workers', 4)
    event = Event()

    pbar = tqdm(desc="Generating mapfiles", total=num_moved)
    res = -1
    try:
        for path in paths:
            res = mapfile_gen(basepath=path,
                              inipath=INIPATH,
                              outpath=MAPOUT,
                              maxprocesses=NUMWORKERS,
                              env_name=ARGS.mapfile_env,
                              debug=debug,
                              event=event,
                              pbar=pbar)
        pbar.close()
    except KeyboardInterrupt as error:
        print_message('Keyboard interrupt caught, exiting')
        event.set()
        return 1
    else:
        if res == 0:
            print_message('Publication prep complete', 'ok')
        else:
            print_message(
                'mapfile generation exited with status: {}'.format(res),
                'error')
        return res