def merge_files_by_process(root_files):
    global input_folder, output_folder
    electron_qcd_samples = [ 'QCD_Pt-20to30_BCtoE',
                 'QCD_Pt-30to80_BCtoE',
                 'QCD_Pt-80to170_BCtoE',
                 'QCD_Pt-20to30_EMEnriched',
                 'QCD_Pt-30to80_EMEnriched',
                 'QCD_Pt-80to170_EMEnriched',
                 'GJets_HT-40To100',
                 'GJets_HT-100To200',
                 'GJets_HT-200']
    singleTop_samples = [ 'T_tW-channel',
                 'T_t-channel',
                 'T_s-channel',
                 'Tbar_tW-channel',
                 'Tbar_t-channel',
                 'Tbar_s-channel']
    wplusjets_samples = [ 'W1Jet', 'W2Jets', 'W3Jets', 'W4Jets']
    vplusjets_samples = wplusjets_samples
    vplusjets_samples.append('DYJetsToLL')
    diboson_samples = [ 'WWtoAnything', 'WZtoAnything', 'ZZtoAnything']
    signal_samples = [ 'TTJet', 'SingleTop']
    
    summations = {
                  'QCD_Electron':electron_qcd_samples,
                  'SingleTop' : singleTop_samples,
                  'WPlusJets' : wplusjets_samples,
                  'VPlusJets' : vplusjets_samples,
                  'DiBoson': diboson_samples,
                  'Signal': signal_samples
                  }
    
    summation_files = {}
    file_template = ''
    template_token = '<temp>'
    for summation, samples in summations.iteritems():
        summation_files[summation] = []
        for file_in_path in root_files:
            process_name = get_process_from_file(file_in_path)
            if not file_template:
                file_template = file_in_path.replace(process_name, template_token)
                file_template = file_template.replace(input_folder, output_folder)
            if process_name in samples:
                summation_files[summation].append(file_in_path)
    
    for summation, files in summation_files.iteritems():
        output_file = file_template.replace(template_token, summation)
        merge_ROOT_files(files, output_file)        
def merge_files_by_process(root_files):
    global input_folder, output_folder
    electron_qcd_samples = [
        'QCD_Pt-20to30_BCtoE', 'QCD_Pt-30to80_BCtoE', 'QCD_Pt-80to170_BCtoE',
        'QCD_Pt-20to30_EMEnriched', 'QCD_Pt-30to80_EMEnriched',
        'QCD_Pt-80to170_EMEnriched', 'GJets_HT-40To100', 'GJets_HT-100To200',
        'GJets_HT-200'
    ]
    singleTop_samples = [
        'T_tW-channel', 'T_t-channel', 'T_s-channel', 'Tbar_tW-channel',
        'Tbar_t-channel', 'Tbar_s-channel'
    ]
    wplusjets_samples = ['W1Jet', 'W2Jets', 'W3Jets', 'W4Jets']
    vplusjets_samples = wplusjets_samples
    vplusjets_samples.append('DYJetsToLL')
    diboson_samples = ['WWtoAnything', 'WZtoAnything', 'ZZtoAnything']
    signal_samples = ['TTJet', 'SingleTop']

    summations = {
        'QCD_Electron': electron_qcd_samples,
        'SingleTop': singleTop_samples,
        'WPlusJets': wplusjets_samples,
        'VPlusJets': vplusjets_samples,
        'DiBoson': diboson_samples,
        'Signal': signal_samples
    }

    summation_files = {}
    file_template = ''
    template_token = '<temp>'
    for summation, samples in summations.iteritems():
        summation_files[summation] = []
        for file_in_path in root_files:
            process_name = get_process_from_file(file_in_path)
            if not file_template:
                file_template = file_in_path.replace(process_name,
                                                     template_token)
                file_template = file_template.replace(input_folder,
                                                      output_folder)
            if process_name in samples:
                summation_files[summation].append(file_in_path)

    for summation, files in summation_files.iteritems():
        output_file = file_template.replace(template_token, summation)
        merge_ROOT_files(files, output_file)
        continue
    print "Merging"
    if 'unfolding' in sample:
#	print 'unfolding in sample'
        output_file = measurement_config.unfolding_output_general_template % sample
	input_files = [measurement_config.unfolding_input_templates[sample] % input_sample for input_sample in input_samples]
    else: #if any (generator_systematic in sample for generator_systematic in measurement_config.generator_systematics):
#        print 'generator systematic in sample'
	output_file = measurement_config.central_general_template % sample
        input_files = [measurement_config.central_general_template % input_sample for input_sample in input_samples]

    print output_file
    for input_file in input_files:
        print input_file
    if not os.path.exists(output_file):
        merge_ROOT_files(input_files, output_file, compression = 7)
        new_files.append(output_file)
    print '='*120

#merge all other histogram files
for category in measurement_config.categories_and_prefixes.keys():
    for sample, input_samples in sample_summations.iteritems():
        if not sample in ['VJets', 'QCD_Muon', 'SingleTop']: #
            continue
        print "Merging"
        output_file = measurement_config.general_category_templates[category] % sample
        print output_file
        input_files = [measurement_config.general_category_templates[category] % input_sample for input_sample in input_samples]
        for input_file in input_files:
            print input_file
        if not os.path.exists(output_file):
# Make folder
make_folder_if_not_exists(path_to_AN_folder + "/" + category)

current_working_directory = os.getcwd()  #find current working directory
output_file_hdfs = config.general_category_templates[category] % sample
output_file = output_file_hdfs.replace(
    "/hdfs/TopQuarkGroup/results/histogramfiles", current_working_directory)
input_files = [
    config.general_category_templates[category] % input_sample
    for input_sample in input_samples
]

if not os.path.exists(output_file):
    merge_ROOT_files(input_files,
                     output_file,
                     compression=7,
                     waitToFinish=True)
    print "merging ", sample
else:
    print 'Not merging ', sample, 'as', output_file, 'already exists'

# Now move output file to hdfs
# Check if file already exists on hdfs
if os.path.exists(output_file_hdfs):
    print "Output file on hdfs already exists.  Removing and replacing with new version."
    command = 'hadoop fs -rm -skipTrash ' + output_file_hdfs.split('/hdfs')[-1]
    p = subprocess.Popen(command, shell=True)
    p.wait()

print '\nStarting rsync'
output_log_file = output_file.replace(".root", ".log")
category = job[0]
sample = job[1]
input_samples = job[2]

# print 'Test with :',sample, category, input_samples

# Make folder
make_folder_if_not_exists( path_to_AN_folder + "/" + category)

current_working_directory = os.getcwd()  #find current working directory
output_file_hdfs = config.general_category_templates[category] % sample
output_file = output_file_hdfs.replace("/hdfs/TopQuarkGroup/results/histogramfiles", current_working_directory)
input_files = [config.general_category_templates[category] % input_sample for input_sample in input_samples]

if not os.path.exists( output_file ):
    merge_ROOT_files( input_files, output_file, compression = 7, waitToFinish=True )
    print "merging ", sample
else :
    print 'Not merging ',sample,'as',output_file,'already exists'

# Now move output file to hdfs
# Check if file already exists on hdfs
if os.path.exists( output_file_hdfs ):
  print "Output file on hdfs already exists.  Removing and replacing with new version."
  command = 'hadoop fs -rm -skipTrash ' + output_file_hdfs.split('/hdfs')[-1]
  p = subprocess.Popen(command, shell=True)
  p.wait()

print '\nStarting rsync'
output_log_file = output_file.replace(".root", ".log")
command = 'rsync --verbose  --progress --stats --compress --recursive --times --update %s %s >> %s' % (output_file,output_file_hdfs,output_log_file)
    current_working_directory = os.getcwd()  #find current working directory
    output_file = config_8TeV.central_general_template % sample
    output_file = output_file.replace(
        "/hdfs/TopQuarkGroup/results/histogramfiles",
        current_working_directory)
    input_files = [
        config_8TeV.central_general_template % input_sample
        for input_sample in input_samples
    ]

    print output_file
    for input_file in input_files:
        print input_file

    if not os.path.exists(output_file):
        merge_ROOT_files(input_files, output_file, compression=7)
        print "merging ", sample
        new_files.append(output_file)
    print '=' * 120

    # if 8 concurrent processes, wait until they are finished before starting the next set to avoid overloading the machine
    while (int(
            subprocess.check_output("ps ax | grep 'hadd' | wc -l", shell=True))
           - 2) >= 8:
        time.sleep(30)  # sleep for 30 seconds

# merge all other histogram files
for category in config_8TeV.categories_and_prefixes.keys():
    for sample, input_samples in sample_summations.iteritems():
        if not sample in ['QCD_Electron', 'QCD_Muon', 'VJets', 'SingleTop']:  #
            continue