Example #1
0
def auto_select_extension(multiple_selection, multiple_selection_from_file,
                          multiple_selection_filename):
    for search_string in multiple_selection:

        info = multiple_selection[search_string]
        if len(info['paths']) != 2: continue
        if len(info['paths'][0]) > len(info['paths'][1]):
            shorter_path = info['paths'][1]
            longer_path = info['paths'][0]
        else:
            shorter_path = info['paths'][0]
            longer_path = info['paths'][1]

        add_string = ''
        remove_string = ''
        for index, key in enumerate(difflib.ndiff(shorter_path, longer_path)):
            if key[0] == '  ': continue
            elif key[0] == '-':
                #print(u'Delete "{}" from position {}'.format(key[-1],index))
                remove_string += key[-1]
            elif key[0] == '+':
                #print(u'Add "{}" to position {}'.format(key[-1],index))
                add_string += key[-1]

        # Found extension
        if remove_string == '' and add_string == '_ext1':
            if search_string in multiple_selection_from_file:
                print('[Info] ' + multiple_selection_filename +
                      ' already has ' + search_string)
                continue

            common_name = get_common_name(info['paths'])
            print('Auto-selecting below paths with COMMON_PATH: ' +
                  common_name)
            print('  ' + shorter_path.replace(common_name, '|COMMON_PATH|'))
            print('  ' + longer_path.replace(common_name, '|COMMON_PATH|'))
            #multiple_selection[search_string] = {'paths': info['paths'], 'selected_paths': info['paths'], 'reason': 'Auto-selected for extension file.'}
            update_selection(search_string,
                             multiple_selection[search_string]['paths'],
                             [0, 1], 'Auto-selected for extension file.',
                             multiple_selection_from_file)
            nested_dict.save_json_file(multiple_selection_from_file,
                                       multiple_selection_filename)
def make_dataset_files_jsons(path_datasets_filename,
                             out_dataset_files_info_filename):
    list_dataset = dataset_files.get_list_dataset(path_datasets_filename)
    # Get files for each dataset
    dataset_file_commands = dataset_files.make_dataset_file_commands(
        list_dataset)
    #dataset_file_commands = [[dataset, commands]]
    dataset_file_results = dataset_files.run_list_command(
        dataset_file_commands)
    #datasets_files_info[dataset][filename] = {'number_events':number_events}
    dataset_files_info = dataset_files.parse_dataset_file_results(
        dataset_file_results)

    # Get meta for each file
    dataset_meta_commands = dataset_files.make_dataset_meta_commands(
        dataset_files_info)
    dataset_meta_results = dataset_files.run_list_command(
        dataset_meta_commands)
    dataset_files.parse_dataset_meta_results(dataset_meta_results,
                                             dataset_files_info)
    nested_dict.save_json_file(dataset_files_info,
                               out_dataset_files_info_filename)
Example #3
0
def update_datasets_files_json(path_datasets_filename,
                               in_dataset_files_info_filename,
                               out_dataset_files_info_filename):
    list_dataset = dataset_files.get_list_dataset(path_datasets_filename)
    #dataset_files_info[dataset][filename] = {'number_events':number_events}
    in_dataset_files_info = nested_dict.load_json_file(
        in_dataset_files_info_filename)

    in_list_dataset = in_dataset_files_info.keys()
    append_list_dataset = list(set(list_dataset) - set(in_list_dataset))
    remove_list_dataset = list(set(in_list_dataset) - set(list_dataset))
    # Get files for each dataset
    append_dataset_file_commands = dataset_files.make_dataset_file_commands(
        append_list_dataset)
    #dataset_file_commands = [[dataset, commands]]
    append_dataset_file_results = dataset_files.run_list_command(
        append_dataset_file_commands)
    #datasets_files_info[dataset][filename] = {'number_events':number_events}
    append_dataset_files_info = dataset_files.parse_dataset_file_results(
        append_dataset_file_results)

    # Get meta for each file
    append_dataset_meta_commands = dataset_files.make_dataset_meta_commands(
        append_dataset_files_info)
    append_dataset_meta_results = dataset_files.run_list_command(
        append_dataset_meta_commands)
    dataset_files.parse_dataset_meta_results(append_dataset_meta_results,
                                             append_dataset_files_info)

    remove_dataset_files_info(in_dataset_files_info, remove_list_dataset)
    out_dataset_files_info = combine_dataset_files_info(
        in_dataset_files_info, append_dataset_files_info)

    print('appended list_dataset: ', str(append_list_dataset))
    print('removed list_dataset: ', str(remove_list_dataset))

    nested_dict.save_json_file(out_dataset_files_info,
                               out_dataset_files_info_filename)
    mc_datasets = nested_dict.load_json_file(mc_datasets_filename)
    datasets.check_false_none_mc_datasets(mc_datasets)

    # Make meta data
    path_to_keys_mc_datasets = datasets.get_path_to_keys_mc_datasets(mc_datasets)
    search_string_to_keys_mc_datasets =  datasets.get_search_string_to_keys_mc_datasets(mc_tag_meta, mc_datasets)
    same_parent_paths = datasets.get_same_parent_paths(mc_datasets)
    multiple_mc_datasets = datasets.get_multiple_mc_datasets(mc_datasets)
    mini_to_nanos_from_nanoaod = datasets.get_mini_to_nanos_from_nanoaod_mc_datasets(mc_datasets)
    nano_to_mini_from_miniaod = datasets.get_nano_to_mini_from_miniaod_mc_datasets(mc_datasets)

  if make_data_datasets:
    data_datasets = nested_dict.load_json_file(data_datasets_filename)
    datasets.check_false_none_data_datasets(data_datasets)
    datasets.print_multiple_data_datasets(data_datasets)
    nested_dict.save_json_file(data_datasets, selected_data_datasets_filename)

  if make_mc_datasets:
    # multiple_selection[search_string]= {'paths':[paths], 'selected_paths':[path_selection], 'reason':reason}
    # Load multiple_selection from file
    multiple_selection_from_file = {}
    if os.path.exists(multiple_selection_filename):
      multiple_selection_from_file = nested_dict.load_json_file(multiple_selection_filename)
    # Load multiple_selection from json
    multiple_selection = get_multiple_selection(multiple_mc_datasets, data_tiers)

    # Should do automatic selection for ext. 
    auto_select_extension(multiple_selection, multiple_selection_from_file, multiple_selection_filename)

    # Show previous selections
    print('--------')
Example #5
0
  initialize_arguments(args)
  valid, log = are_arguments_valid(args)
  if not valid:
    print('[Error] '+log)
    sys.exit()

  queue = ucsb_queue.ucsb_queue()

  #jobs_info_filename = 'jsons/submitted_test_mc_jobs_info.json'
  #output_json = 'jsons/checked_test_mc_jobs_info.json'
  #jobscript_check_filename = './copy_aods_check_entries.py'
  #statuses = ['submitted']

  jobs_info_filename = args['jobs_info_filename']
  output_json = args['output_json']
  jobscript_check_filename = args['jobscript_check_filename']
  statuses = args['statuses']

  # Checks the jobs
  # jobs_info = [{'command_script':command_script, 'other_global_key':other_global_key, 'ignore_keys':['job_id', 'job_status', ...]},{'key_for_job':key_for_job},{'key_for_job':key_for_job},...]
  jobs_info = nested_dict.load_json_file(jobs_info_filename)

  # Each job type should make job_script, and job_check_script
  # The ./job_check_script job_log_string should return 'success' or 'fail' for a job_log_string
  # statuses: [status], where status = 'submitted', 'done', 'fail', 'success', 'to_submit'
  queue.check_jobs(jobs_info, statuses, jobscript_check_filename, args['debug'])
  #queue.check_jobs(jobs_info, ['submitted', 'done', 'fail', 'success', 'to_submit'], jobscript_check_filename)
  queue.print_jobs_status(jobs_info)

  nested_dict.save_json_file(jobs_info, output_json)
Example #6
0
    bad_ps_weights_mc_datasets = get_unrejected_if_possible_mc_datasets(filtered_mc_datasets, reject_string_ignore_case_mc_datasets, 'PSweights')
    print('Using ps_weights for below, because no other datasets')
    datasets.print_path_mc_datasets(bad_ps_weights_mc_datasets)

    pu_filtered_mc_datasets = filter_if_possible_mc_datasets(filtered_mc_datasets, reject_bad_pu_2017_mc_datasets)
    #datasets.print_path_mc_datasets(pu_filtered_mc_datasets)
    #datasets.print_multiple_mc_datasets(pu_filtered_mc_datasets)
    #datasets.print_incomplete_parent_mc_datasets(pu_filtered_mc_datasets)
    datasets.print_missing_mc_datasets(keys_mc_datasets, pu_filtered_mc_datasets)

    bad_pu_mc_datasets = get_unrejected_if_possible_mc_datasets(filtered_mc_datasets, reject_bad_pu_2017_mc_datasets)
    #datasets.print_path_parent_mc_datasets(bad_pu_mc_datasets)
    print('Using bad pileup for below, because no other datasets')
    datasets.print_path_mc_datasets(bad_pu_mc_datasets)

    nested_dict.save_json_file(pu_filtered_mc_datasets, out_filtered_mc_datasets_filename)
    nested_dict.save_json_file(bad_pu_mc_datasets, out_bad_pu_mc_datasets_filename)
    nested_dict.save_json_file(bad_ps_weights_mc_datasets, out_ps_weight_mc_datasets_filename)

  if make_data_datasets:
    # data_dataset[stream][year][run_group][data_tier][path] = {"parent_chain":[], "children":[], "creation time":string, "size":int, "files":int, "events":int, "runs":[]}
    data_datasets = nested_dict.load_json_file(data_datasets_filename)
    datasets.check_overlapping_paths_data_datasets(data_datasets)

    #print_multiple_data_datasets(data_datasets)
    # keys_data_datasets = [ [stream, year, run_group, data_tier, search_string] ]
    keys_data_datasets = datasets.get_keys_data_datasets(data_tag_meta, data_tiers)
    #nested_dict.remove_key_nested_dict(data_datasets, '/SingleElectron/Run2017C-31Mar2018-v1/MINIAOD')
    datasets.print_missing_data_datasets(keys_data_datasets, data_datasets)

    filtered_data_datasets = filter_data_datasets(data_datasets, reject_string_ignore_case_path_parent_data_datasets, 'pilot')
Example #7
0
    args = vars(parser.parse_args())
    argparse_helper.initialize_arguments(args, list_args=['mc_data'])
    valid, log = are_arguments_valid(args)
    if not valid:
        print('[Error] ' + log)
        sys.exit()

    do_mc = False
    if 'mc' in args['mc_data']: do_mc = True
    do_data = False
    if 'data' in args['mc_data']: do_data = True

    base_folder = args['base_folder']
    mc_disk_files_filename = os.path.join(
        args['out_jsons_folder'],
        args['out_jsons_prefix'] + 'mc_disk_files.json')
    data_disk_files_filename = os.path.join(
        args['out_jsons_folder'],
        args['out_jsons_prefix'] + 'data_disk_files.json')

    if do_mc:
        # mc_disk_files[data_tier][aod_tag][year][mc_dir][filename] = {'file_events': int}
        mc_disk_files = make_mc_disk_files(base_folder)
        nested_dict.save_json_file(mc_disk_files, mc_disk_files_filename)

    if do_data:
        # data_disk_files[data_tier][aod_tag][year][data_dir][filename] = {'file_events': int}
        data_disk_files = make_data_disk_files(base_folder)
        nested_dict.save_json_file(data_disk_files, data_disk_files_filename)
Example #8
0
                if len(key_value_split) < 2:
                    print('[Warning]: Cannot add below line to global_key.')
                    print('  ' + line)
                    continue
                key = key_value_split[0].lstrip().rstrip()
                value = ':'.join(key_value_split[1:]).lstrip().rstrip()
                json_value = None
                try:
                    json_value = json.loads(value)
                    nested_dict.convert_to_ascii(json_value)
                    value = json_value
                except ValueError:
                    pass
                if key in jobs_info[0]:
                    if value != jobs_info[0][key]:
                        print('[Warning]: global_key: ' + key +
                              ' is different. ' + jobs_info[0][key] + ' ' +
                              value)
                jobs_info[0][key] = value
                if json_value:
                    print('Setting job_info[0][' + key + '] to ' +
                          repr(value) + ' as a json')
                else:
                    print('Setting job_info[0][' + key + '] to ' + str(value) +
                          ' as a string')
            # Parse commands
            elif '#' != line[0]:
                jobs_info.append({'command': line.rstrip()})

    nested_dict.save_json_file(jobs_info, args['jobs_info_filename'])
Example #9
0
            [mc_dataset_common_names_filename, ['2016', '2017', '2018']],
            [mc_dataset_2016_names_filename, ['2016']],
            [mc_dataset_2017_names_filename, ['2017']],
            [mc_dataset_2018_names_filename, ['2018']],
        ])
        #print(mc_dataset_names)

        #print ('dataset_names:', mc_dataset_names)
        # Ex) tag_meta[2016] = RunIISummer16, MiniAODv3, NanoAODv5
        mc_tag_meta = datasets.parse_mc_tag_meta(mc_tag_meta_filename)

    if make_data_datasets:
        # Ex) data_tag_meta[2016][B][MET][miniaod] = 17Jul2018
        data_tag_meta = datasets.parse_data_tag_meta(data_tag_meta_filename)

    if make_mc_datasets:
        mc_datasets = nested_dict.load_json_file(mc_datasets_filename)
        mc_datasets_update = datasets.update_mc_datasets(
            mc_dataset_names, mc_tag_meta, data_tiers, mc_datasets)
        #datasets.print_path_mc_datasets(mc_datasets)
        #datasets.print_path_mc_datasets(mc_datasets_update)
        nested_dict.save_json_file(mc_datasets_update,
                                   out_update_mc_datasets_filename)

    if make_data_datasets:
        data_datasets = nested_dict.load_json_file(data_datasets_filename)
        data_datasets_update = datasets.update_data_datasets(
            data_tag_meta, data_tiers, data_datasets)
        nested_dict.save_json_file(data_datasets_update,
                                   out_update_data_datasets_filename)
Example #10
0
    parser.add_argument('jobs_info_filename', metavar='jobs_info.json')
    parser.add_argument('-o',
                        '--output_json',
                        metavar='submitted_jobs_info.json',
                        nargs=1)
    parser.add_argument('-n', '--node', metavar='"node_name"', nargs=1)
    args = vars(parser.parse_args())

    initialize_arguments(args)
    valid, log = are_arguments_valid(args)
    if not valid:
        print('[Error] ' + log)
        sys.exit()

    #jobs_info_filename = 'jsons/test_mc_jobs_info.json'
    #output_json = 'jsons/submitted_test_mc_jobs_info.json'
    #node = 'cms1'

    jobs_info_filename = args['jobs_info_filename']
    output_json = args['output_json']
    node = args['node']

    # jobs_info = [{'command_script':command_script, 'other_global_key':other_global_key},{'key_for_job':key_for_job},{'key_for_job':key_for_job},...]
    jobs_info = nested_dict.load_json_file(jobs_info_filename)
    queue = ucsb_queue.ucsb_queue()
    # statuses: [status], where status = 'submitted', 'done', 'fail', 'success', 'to_submit'
    node, number_combined_commands, print_or_run = queue.submit_jobs_info(
        jobs_info, node=node)

    if print_or_run == 'r': nested_dict.save_json_file(jobs_info, output_json)