Python get_from_deep_json Examples, utils.get_from_deep_json Python Examples

Example #1

0

Show file

File: mcm_store.py Project: mantasavas/data-curation

def get_generator_name(dataset, das_dir, mcm_dir):
    "Return list of generators used for that dataset"
    generator_names = []
    mcm_dict = get_mcm_dict(dataset, mcm_dir)
    generators = get_from_deep_json(mcm_dict, 'generators')
    input_generators = []

    dataset_json = get_das_store_json(dataset, 'mcm', das_dir)
    input_dataset = get_from_deep_json(dataset_json, 'input_dataset')
    if input_dataset:
        mcm_dict = get_mcm_dict(input_dataset, mcm_dir)
        input_generators = get_from_deep_json(mcm_dict, 'generators')

    if generators and input_generators:
        generators += input_generators

    if generators:
        for item in generators:
            for char in ['"', '\\', '[', ']']:  # remove ", \, [, ]
                item = item.replace(char, '')
            generator = item
            if generator not in generator_names:
                generator_names.append(item)

    return generator_names

Example #2

0

Show file

File: mcm_store.py Project: mokotus/data-curation

def get_prepid_from_mcm(dataset, mcm_dir):
    "get prepid for dataset from McM store"

    # get prepid from das/dataset
    prepid = get_from_deep_json(get_mcm_dict(dataset, mcm_dir), 'prep_id')

    if prepid == None:
        # try different queries from the json. prep_id?
        prepid = get_from_deep_json(get_mcm_dict(dataset, mcm_dir), 'prepid')

    return prepid

Example #3

0

Show file

File: mcm_store.py Project: mokotus/data-curation

def get_prepId_from_das(dataset, das_dir):
    "get prepid for dataset"

    # get prepid from das/dataset
    prepid = get_from_deep_json(get_das_store_json(dataset, 'dataset', das_dir), 'prep_id')

    if prepid == None:
        # try to get from das/mcm:
        prepid = get_from_deep_json(get_das_store_json(dataset, 'mcm', das_dir), 'prepid')
        # todo also try different queries from the json. prep_id?

    return prepid

Example #4

0

Show file

File: config_store.py Project: mokotus/data-curation

def get_conffile_ids(dataset, das_dir):
    """Return location of the configuration files for the dataset."""
    ids = {}
    byoutput = get_from_deep_json(
        get_das_store_json(dataset, 'config', das_dir), 'byoutputdataset')
    byinput = get_from_deep_json(
        get_das_store_json(dataset, 'config', das_dir), 'byinputdataset')
    if byoutput:
        for someid in byoutput:
            ids[someid] = 1
    if byinput:
        for someid in byinput:
            ids[someid] = 1
    return list(ids.keys())

Example #5

0

Show file

File: dataset_records.py Project: mantasavas/data-curation

def get_number_files(dataset, das_dir):
    """Return number of files for the dataset."""
    number_files = get_from_deep_json(
        get_das_store_json(dataset, 'dataset', das_dir), 'nfiles')
    if number_files:
        return number_files
    return 0

Example #6

0

Show file

File: dataset_records.py Project: mantasavas/data-curation

def get_size(dataset, das_dir):
    """Return size of the dataset."""
    size = get_from_deep_json(get_das_store_json(dataset, 'dataset', das_dir),
                              'size')
    if size:
        return size
    return 0

Example #7

0

Show file

File: dataset_records.py Project: mantasavas/data-curation

def get_number_events(dataset, das_dir):
    """Return number of events for the dataset."""
    number_events = get_from_deep_json(
        get_das_store_json(dataset, 'dataset', das_dir), 'nevents')
    if number_events:
        return number_events
    return 0

Example #8

0

Show file

File: das_json_store.py Project: cernopendata/data-curation

def get_cmssw_version_from_das(dataset, das_dir):
    """Return CMSSW release version from DAS JSON."""
    out = get_from_deep_json(get_das_store_json(dataset, 'release', das_dir),
                             'name')
    if out:
        return out[0]
    else:
        return {}

Example #9

0

Show file

File: mcm_store.py Project: cernopendata/data-curation

def get_generator_parameters_from_mcm(dataset, mcm_dir):
    """Return generator parameters dictionary for given dataset."""
    mcm_dict = get_mcm_dict(dataset, mcm_dir)
    out = get_from_deep_json(mcm_dict, 'generator_parameters')
    if out:
        return out[0]
    else:
        return {}

Example #10

0

Show file

File: mcm_store.py Project: mokotus/data-curation

def get_global_tag(dataset, mcm_dir):
    "Get global tag from McM dictionary"
    mcm_dict = get_mcm_dict(dataset, mcm_dir)
    global_tag = get_from_deep_json(mcm_dict, 'conditions')

    if not global_tag:
        global_tag = ''

    return global_tag

Example #11

0

Show file

File: mcm_store.py Project: mokotus/data-curation

def get_cmssw_version_from_mcm(dataset, mcm_dir):
    "Get CMSSW version from McM dictionary"
    mcm_dict = get_mcm_dict(dataset, mcm_dir)
    cmssw = get_from_deep_json(mcm_dict, 'cmssw_release')

    if not cmssw:
        cmssw = ''

    return cmssw

Example #12

0

Show file

def get_generator_parameters(dataset, das_dir):
    """Return generator parameters dictionary for given dataset."""
    # TODO get from mcm store instead?
    # and/or from xsecDB
    out = get_from_deep_json(get_das_store_json(dataset, 'mcm', das_dir),
                             'generator_parameters')
    if out:
        return out[0]
    else:
        return {}

Example #13

0

Show file

File: das_json_store.py Project: cernopendata/data-curation

def get_parent_dataset(dataset, das_dir):
    "Return parent dataset to the given dataset or an empty string if no parent found."
    parent_dataset = ''

    filepath = das_dir + '/parent/' + dataset.replace('/', '@') + '.json'

    if os.path.exists(filepath) and os.stat(filepath).st_size != 0:
        parent_dataset = get_from_deep_json(
            get_das_store_json(dataset, 'parent', das_dir), 'parent_dataset')
    return parent_dataset

Example #14

0

Show file

def get_parent_dataset(dataset, das_dir):
    "Return parent dataset to the given dataset or an empty string if no parent found."
    parent_dataset = ''
    try:
        parent_dataset = get_from_deep_json(
            get_das_store_json(dataset, 'parent', das_dir), 'parent_dataset')
    except:
        # troubles getting information about parent
        pass
    return parent_dataset

Example #15

0

Show file

File: dataset_records.py Project: cernopendata/data-curation

def get_conffile_ids_from_das(dataset, das_dir, mcm_dir):
    """Return location of the configuration files for the dataset from DAS."""
    ids = {}
    output = get_from_deep_json(get_das_store_json(dataset, 'config', das_dir),
                                'byoutputdataset')
    if output:
        for someid in output:
            ids[someid] = 1
    else:
        print("Error: No config id found from DAS config for " + dataset,
              file=sys.stderr)
    return list(ids.keys())

Example #16

0

Show file

File: mcm_store.py Project: mokotus/data-curation

def get_dataset_energy(dataset, mcm_dir):
    "Return energy of that dataset in TeV"
    mcm_dict = get_mcm_dict(dataset, mcm_dir)
    if mcm_dict:
        energy = get_from_deep_json(mcm_dict, 'energy')
        if isinstance(energy, str):
            return energy
        else:
            return str(energy).replace('.0', '') + 'TeV'

    else:
        year = get_dataset_year(dataset)
        return {
               2010:  '7TeV',
               2011:  '7TeV',
               2012:  '8TeV',
               2015: '13TeV',
               2016: '13TeV',
               }.get(year, 0)

Example #17

0

Show file

File: mcm_store.py Project: mantasavas/data-curation

def get_genfragment_url(dataset, mcm_dir, das_dir):
    "return list of url's of the genfragments used"
    input_dataset = ''
    url = []

    # get GEN-SIM dataset
    if get_dataset_format(dataset) == 'AODSIM':
        dataset_json = get_das_store_json(dataset, 'mcm', das_dir)
        input_dataset = get_from_deep_json(dataset_json, 'input_dataset')
    else:
        input_dataset = dataset

    script_path = get_cmsDriver_script(input_dataset, mcm_dir)
    if script_path == None:
        return None

    with open(script_path, 'r') as script:
        for line in script:
            if 'curl' in line:
                curl = re.search('(?P<url>https?://[^\s]+)', line)
                if curl:
                    url.append(curl.group('url'))
    return url

Example #18

0

Show file

File: mcm_store.py Project: mokotus/data-curation

def get_conffile_ids_from_mcm(dataset, das_dir, mcm_dir):
    """Return location of the configuration files for the dataset from McM."""
    config_ids = []
    mcm_dict = get_mcm_dict(dataset, mcm_dir)
    config_ids = get_from_deep_json(mcm_dict, 'config_id')
    return config_ids

Example #19

0

Show file

File: mcm_store.py Project: mokotus/data-curation

def get_parent_dataset_from_mcm(dataset, das_dir, mcm_dir):
    "Return parent dataset to given DATASET from McM."
    parent_dataset = ''
    mcm_dict = get_mcm_dict(dataset, mcm_dir)
    parent_dataset = get_from_deep_json(mcm_dict, 'input_dataset')
    return parent_dataset

Example #20

0

Show file

File: mcm_store.py Project: cernopendata/data-curation

def get_pileup_from_mcm(dataset, mcm_dir):
    """Return pileup_dataset_name for given dataset."""
    mcm_dict = get_mcm_dict(dataset, mcm_dir)
    pileup = get_from_deep_json(mcm_dict, 'pileup_dataset_name')
    return pileup

Example #21

0

Show file

File: printer.py Project: mantasavas/data-curation

def print_ancestor_information(dataset, das_dir, mcm_dir, recid_file,
                               doi_info):
    "All the information we have so far"
    # everything should be a sublist item (4 spaces of indentation):
    # - dataset_name
    #     - info

    # TODO add to this function:
    # - config files present
    #   - step GEN
    #   - step RECO
    #   - step HLT
    # - gen_parameters:
    #   - cross section from XSECDB.
    #     see github issue opendata.cern.ch#1137
    #     ideally we should make a local cache of that.
    # - LHE stuff?
    # - Data popularity from github.com/katilp/cms-data-popularity
    #   ideally we should make a local cache of that.
    # it would be very nice if this printer script needed not external (non cached) information

    # record ID as in OpenData portal
    # TODO move this code to other place, no need to open a file everytime
    RECID_INFO = {}
    _locals = locals()
    exec(open(recid_file, 'r').read(), globals(), _locals)
    RECID_INFO = _locals['RECID_INFO']

    try:
        recid = RECID_INFO[dataset]
        print("    - Record ID: [{recid}]({url})".format(
            recid=recid, url='http://opendata.cern.ch/record/' + str(recid)))
    except:
        pass

    # DOI
    doi = get_doi(dataset, doi_info)
    if doi:
        print("    - DOI: [{doi}]({url})".format(doi=doi,
                                                 url='https://doi.org/' +
                                                 str(doi)))

    # PrepId
    prepid = get_prepId_from_das(dataset, das_dir)
    if not prepid:
        prepid = get_prepid_from_mcm(dataset, mcm_dir)
    if prepid:
        print("    - PrepId: [{prepid}]({url})".format(
            prepid=prepid,
            url='https://cms-pdmv.cern.ch/mcm/requests?prepid=' + str(prepid)))

    # global tag & cmssw version
    global_tag = get_global_tag(dataset, mcm_dir)
    cmssw_ver = get_cmssw_version(dataset, mcm_dir)
    if global_tag:
        print("    - Global Tag:", global_tag)
    if cmssw_ver:
        print("    - CMSSW version:", cmssw_ver)

    # Energy
    print("    - Collision Energy: ", get_dataset_energy(dataset, mcm_dir),
          "TeV")

    # Generators
    generators = get_generator_name(dataset, das_dir, mcm_dir)
    if generators:
        print("    - Generators: ", generators)

    # GEN-SIM dataset used to produce the AODSIM
    dataset_json = get_das_store_json(dataset, 'mcm', das_dir)
    input_dataset = get_from_deep_json(dataset_json, 'input_dataset')
    if input_dataset:
        print("    - Input Dataset:", input_dataset)

        input_global_tag = get_global_tag(input_dataset, mcm_dir)
        input_cmssw_ver = get_cmssw_version(input_dataset, mcm_dir)
        if input_global_tag:
            print("        - Global Tag:", input_global_tag)
        if input_cmssw_ver:
            print("        - CMSSW version:", input_cmssw_ver)

        gen_fragment = get_genfragment_url(dataset, mcm_dir, das_dir)
        if gen_fragment:
            for url in gen_fragment:
                print("        - Gen Fragment: [{url}]({url})".format(url=url))

    # gen parameters of input dataset
    generator_parameters = get_generator_parameters(dataset, das_dir)
    if generator_parameters:
        print('        - Generator parameters:')
        print('            - Cross section:',
              generator_parameters.get('cross_section', None))
        print('            - Filter efficiency:',
              generator_parameters.get('filter_efficiency', None))
        print('            - Filter efficiency error:',
              generator_parameters.get('filter_efficiency_error', None))
        print('            - Match efficiency:',
              generator_parameters.get('match_efficiency', None))
        print('            - Match efficiency error:',
              generator_parameters.get('match_efficiency_error', None))

    # mcm scripts with cmsDriver instructions
    cmsDriver1 = get_cmsDriver_script(input_dataset, mcm_dir)
    cmsDriver2 = get_cmsDriver_script(dataset, mcm_dir)
    global DATASETS_WITH_BOTH_CMSDRIVER
    global DATASETS_WITH_CMSDRIVER1
    global DATASETS_WITH_CMSDRIVER2

    if cmsDriver1 or cmsDriver2:
        print("    - cmsDriver scripts:")
        if cmsDriver1:
            print('        - GEN-SIM:', cmsDriver1)
            DATASETS_WITH_CMSDRIVER1 += 1
        if cmsDriver2:
            print('        - RECO-HLT:', cmsDriver2)
            DATASETS_WITH_CMSDRIVER2 += 1

        if cmsDriver1 and cmsDriver2:
            DATASETS_WITH_BOTH_CMSDRIVER += 1

    # python config files
    conffile_ids = get_conffile_ids(dataset, das_dir)
    parent = get_parent_dataset(dataset, das_dir)
    while parent != '' and parent:
        conffile_ids += get_conffile_ids(parent, das_dir)
        parent = get_parent_dataset(parent, das_dir)
    global DATASETS_WITH_3CONFFILES
    if conffile_ids:
        print("    - python config scripts: ", conffile_ids)
        if len(conffile_ids) > 2:
            DATASETS_WITH_3CONFFILES += 1

    global DATASETS_WITH_FULL_PROVENANCE
    if (cmsDriver1 and cmsDriver2) or len(conffile_ids) > 2:
        DATASETS_WITH_FULL_PROVENANCE += 1

    # pile up information
    mcm_dict = get_mcm_dict(dataset, mcm_dir)
    if mcm_dict:
        pileup = get_from_deep_json(mcm_dict, 'pileup')
        pileup_dataset = get_from_deep_json(mcm_dict, 'pileup_dataset_name')
        if pileup or pileup_dataset:
            print('    - pile-up:')
            if pileup:
                print('        -', pileup)
            if pileup_dataset:
                print('        -', pileup_dataset)

        notes = get_from_deep_json(mcm_dict, 'notes')
        if notes != None:
            print(
                '    - notes:', notes.replace('\n', '\n        ')
            )  # some notes have several lines, this makes the markdown use them in the same item list

Example #22

0

Show file

File: mcm_store.py Project: mokotus/data-curation

def mcm_downloader(prepid, dataset, mcm_dir, das_dir):
    "Query dictionary and setup script from McM database"
    # this function is so ugly... but finally works! You're welcome to refactor it though

    cmd = "curl -s -k https://cms-pdmv.cern.ch/mcm/public/restapi/requests/{query}/{prepId}"

    # As prep_id in DAS for some datasets can be found with underscores and MCM
    # takes without underscores, we need to process prep_id removing all of them
    if "_" in prepid:
        print("Found some underscores in prep_id: " + prepid + ", removing...")
        prepid = prepid.replace("_", "")

    mcm_dict = subprocess.run(cmd.format(query="get", prepId=prepid),
                              shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    mcm_script = subprocess.run(cmd.format(query="get_setup", prepId=prepid),
                                shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    mcm_dict_out = str(mcm_dict.stdout.decode("utf-8"))
    mcm_script_out = str(mcm_script.stdout.decode("utf-8"))

    # check if results are not empty
    if mcm_dict_out == '{"results": {}}\n':
        print("[ERROR] Empty McM dict (get) for {ds}".format(ds=dataset),
              file=sys.stderr)
    else:
        outfile = mcm_dir + "/dict/" + dataset.replace('/', '@') + ".json"
        with open(outfile, 'w') as dict_file:
                dict_file.write(mcm_dict_out)

    if mcm_script_out == '' or mcm_script_out[0] == '{':
        print("[ERROR] Empty McM script (get_setup) for {ds}".format(ds=dataset),
              file=sys.stderr)
    else:
        outfile = mcm_dir + "/scripts/" + dataset.replace('/', '@') + ".sh"
        with open(outfile, 'w') as dict_file:
                dict_file.write(mcm_script_out)

    # same thing for "input_dataset": hopefully the GEN-SIM step
    dataset_json = get_das_store_json(dataset, 'mcm', das_dir)
    input_dataset = get_from_deep_json(dataset_json, 'input_dataset')  # /bla/ble/GEN-SIM
    if input_dataset:
        mcm_dict = subprocess.run(cmd.format(query="produces", prepId=input_dataset[1:]),
                                 shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        mcm_out = str(mcm_dict.stdout.decode("utf-8"))
        # check if results are not empty
        if mcm_out == '{"results": {}}' or mcm_out == '{"results": {}}\n':
            print("[ERROR] Empty McM dict (get) for {ds}".format(ds=input_dataset),
                  file=sys.stderr)
        else:
            outfile = mcm_dir + "/dict/" + input_dataset.replace('/', '@') + ".json"
            with open(outfile, 'w') as dict_file:
                    dict_file.write(mcm_out)

            prepid = get_prepid_from_mcm(input_dataset, mcm_dir)
            if prepid != None:
                mcm_script = subprocess.run(cmd.format(query="get_setup", prepId=prepid),
                                            shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                if mcm_script.stdout.decode("utf-8")[0] == '{':
                    print("[ERROR] Empty McM script (get_setup) for {ds}".format(ds=input_dataset),
                          file=sys.stderr)
                else:
                    outfile = mcm_dir + "/scripts/" + input_dataset.replace('/', '@') + ".sh"
                    with open(outfile, 'w') as dict_file:
                            dict_file.write(mcm_script.stdout.decode("utf-8"))
            else:
                print("[ERROR] No prep_id in McM Store for record {ds}".format(ds=input_dataset),
                      file=sys.stderr)
    else:
        print("[ERROR] No input_dataset in das_store/mcm for record {ds}".format(ds=dataset),
              file=sys.stderr)

Example #23

0

Show file

File: dataset_records.py Project: cernopendata/data-curation

def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir):
    """Return DICT with all information about the generator steps."""

    info = {}
    info[
        "description"] = "<p>These data were generated in several steps (see also <a href=\"/docs/cms-mc-production-overview\">CMS Monte Carlo production overview</a>):</p>"
    info["steps"] = []

    input_dataset = dataset
    while input_dataset:
        step = {}
        process = ''
        step['output_dataset'] = input_dataset
        release = get_cmssw_version(input_dataset, das_dir, mcm_dir)
        if release:
            step['release'] = release
        global_tag = get_global_tag(input_dataset, mcm_dir)
        if global_tag:
            step['global_tag'] = global_tag

        cmsdriver_path = get_cmsDriver_script(input_dataset, mcm_dir)
        step['configuration_files'] = []
        if cmsdriver_path:
            with open(cmsdriver_path) as myfile:
                configuration_files = {}
                configuration_files['title'] = 'Production script'
                configuration_files['script'] = myfile.read()
                if configuration_files:
                    step['configuration_files'].append(configuration_files)

        generator_names = get_generator_name(input_dataset, das_dir, mcm_dir)
        if generator_names:
            step['generators'] = generator_names
        gen_fragment = get_genfragment_url(input_dataset, mcm_dir, das_dir)
        if gen_fragment:
            for url in gen_fragment:
                configuration_files = {}
                configuration_files['title'] = 'Generator parameters'
                configuration_files['url'] = url
                try:
                    script = requests.get(url, verify=False).text
                    configuration_files['script'] = script
                except:
                    pass
                if configuration_files:
                    step['configuration_files'].append(configuration_files)

        config_ids = get_conffile_ids(input_dataset, das_dir, mcm_dir)
        if config_ids:
            for config_id in config_ids:
                afile = config_id + '.configFile'
                proc = get_process(afile, conf_dir)
                if process:
                    process += " " + proc
                else:
                    process += proc
                configuration_files = {}
                configuration_files['title'] = 'Configuration file'
                configuration_files['process'] = proc
                configuration_files['cms_confdb_id'] = config_id
                globaltag = get_globaltag_from_conffile(afile, conf_dir)
                if not 'global_tag' in step:
                    step['global_tag'] = globaltag

                step['configuration_files'].append(configuration_files)

        # if we couldn't detect process type from config files, try guessing
        # via extension:
        if not process:
            if input_dataset.endswith('/LHE'):
                process = 'LHE'
            elif input_dataset.endswith('/SIM'):
                process = 'SIM'
            elif input_dataset.endswith('/GEN-SIM'):
                process = 'SIM'
        #if process == 'LHE':
        #    step['note'] = "To get the exact generator parameters, please see <a href=\"/docs/cms-mc-production-overview#finding-the-generator-parameters\">Finding the generator parameters</a>."
        step['type'] = process

        # For cases where SIM and LHE steps are done together
        datatier = get_from_deep_json(get_mcm_dict(input_dataset, mcm_dir),
                                      'datatier')
        if datatier == ["GEN-SIM", "LHE"]:
            step['type'] = "LHE SIM"
            for i, configuration_files in enumerate(
                    step['configuration_files']):
                if configuration_files['title'] == 'Generator parameters':
                    step['configuration_files'][i][
                        'title'] = 'Hadronizer parameters'
        info["steps"].append(step)

        # find parent dataset, first via DAS, then via McM
        input_dataset_das = get_parent_dataset(input_dataset, das_dir)
        input_dataset_mcm = get_parent_dataset_from_mcm(
            input_dataset, das_dir, mcm_dir)
        if input_dataset_mcm == 'Default':  # workaround McM bugs
            input_dataset_mcm = ''
        if input_dataset_das:
            input_dataset = input_dataset_das
        else:
            input_dataset = input_dataset_mcm

    # reverse order of steps for provenance
    info['steps'].reverse()

    # post-generation fix: if we have LHE step, let's modify the configuration file titles for other steps:
    lhe_present = False
    for step in info['steps']:
        if lhe_present:
            for configuration_file in step.get('configuration_files'):
                if configuration_file['title'] == 'Generator parameters':
                    configuration_file['title'] = 'Hadronizer parameters'
        if 'LHE' in step['type']:
            lhe_present = True

    # post-generation fix: keep generators only for the first step, remove from others:
    generators_present = False
    for step in info['steps']:
        if generators_present:
            if 'generators' in step:
                del (step['generators'])
        else:
            if 'generators' in step:
                generators_present = True

    return info

Example #24

0

Show file

File: mcm_store.py Project: cernopendata/data-curation

def mcm_downloader(dataset, mcm_dir, das_dir):
    "Query dictionary and setup script from McM database"

    filepath = mcm_dir + "/dict/" + dataset.replace('/', '@') + ".json"
    if os.path.exists(filepath) and os.stat(filepath).st_size != 0:
        print("==> " + dataset + "\n==> Already exist. Skipping...")
        return

    cmd = "curl -s -k https://cms-pdmv.cern.ch/mcm/public/restapi/requests/"

    mcm_dict = subprocess.run(cmd + "produces" + dataset,
                              shell=True,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)
    mcm_dict_out = str(mcm_dict.stdout.decode("utf-8"))
    prepid = None
    if mcm_dict_out != '{"results": {}}\n' or mcm_dict_out != '{"results": {}}':
        # get prepid from mcm/dataset
        prepid = get_from_deep_json(json.loads(mcm_dict_out), "prepid")
        if prepid == None:
            prepid = get_from_deep_json(json.loads(mcm_dict_out), "prep_id")

    if prepid == None:
        prepid = get_prepId_from_das(dataset, das_dir)

    if prepid == None:
        print("Error: prepid not found in mcm, das, and das/mcm for " +
              dataset + "\n==> Skipping dataset McM dict and script",
              file=sys.stderr)
        return

    # check if McM dict is empty try to get it by das prepid ( /get/perpid instead of /produces/dataset)
    if mcm_dict_out == '{"results": {}}\n' or mcm_dict_out == '{"results": {}}':
        cmd = "curl -s -k https://cms-pdmv.cern.ch/mcm/public/restapi/requests/"

        mcm_dict = subprocess.run(cmd + "get/" + prepid,
                                  shell=True,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
        mcm_dict_out = str(mcm_dict.stdout.decode("utf-8"))

    # check if it is still empty (then there is no way to get dataset McM dict)
    if mcm_dict_out == '{"results": {}}\n' or mcm_dict_out == '{"results": {}}':
        print(
            "[ERROR] Empty McM dict (get) for {ds} \n with prepid {pd}".format(
                ds=dataset, pd=prepid),
            file=sys.stderr)
    else:
        outfile = mcm_dir + "/dict/" + dataset.replace('/', '@') + ".json"
        with open(outfile, 'w') as dict_file:
            dict_file.write(mcm_dict_out)

    mcm_script = subprocess.run(cmd + "get_test/" + prepid,
                                shell=True,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
    mcm_script_out = str(mcm_script.stdout.decode("utf-8"))

    if mcm_script_out == '' or mcm_script_out[0] == '{':
        print(
            "[ERROR] Empty McM script (get_test) for {ds}".format(ds=dataset),
            file=sys.stderr)
    else:
        outfile = mcm_dir + "/scripts/" + dataset.replace('/', '@') + ".sh"
        with open(outfile, 'w') as dict_file:
            dict_file.write(mcm_script_out)