예제 #1
0
def call_analyze_slice_condor(
	module_name,
	analysis_name,
	tree,
	grl,
	files,
	ranges,
	process_number,
	directory,
	result_queue,
	error_queue,
	logger_queue,
	keep,
	):

	analysis_framework = os.getenv('ANALYSISFRAMEWORK')
	analysis_home = os.getenv('ANALYSISHOME')

	sys.stdout = logpatch(logger_queue,'Process number {0}: '.format(process_number),'')
	sys.stderr = logpatch(logger_queue,'Process number {0}: '.format(process_number),'')

	num_processes = len(ranges)
	if num_processes>1: condor_dir = '{directory}/condor_{0:0>{1}}'.format(process_number,int(log(num_processes-1,10))+1,directory=directory)
	else: condor_dir = '{directory}/condor'.format(directory=directory)

	os.mkdir(condor_dir)
	os.chdir(condor_dir)

	output_name = os.path.abspath('output.root')

	#cleanup function always called no matter form of exit
	def cleanup(logger_text,error_text,output_name):
		#output name will be None if there is some problem
		if logger_text: print logger_text
		if error_text:
			output_name = None
			error=error_text
		else: error=None
		result_queue.put(output_name)
		#error will be None if there is NO problem
		if error: error_queue.put(error)
		sys.exit()

	files_text = 'files.text'
	with open(files_text,'w') as f:
		for file_ in files: f.write(file_+'\n')
		
	result_file_name = 'result.out'
	error_file_name = 'error.out'
	logger_file_name = 'logger.out'

	#setup condor files
	with open('{0}/condor/default_condor.submit'.format(analysis_framework)) as f:
		with open('condor.submit','w') as f_out:
			f_out.write(f.read())

	with open('{0}/condor/default_condor_executable.sh'.format(analysis_framework)) as f:
		with open('condor_executable.sh','w') as f_out:
			f_out.write(f.read().format(
				analysis_framework = analysis_framework,
				analysis_home = analysis_home,
				module_name = module_name,
				analysis_name = analysis_name,
				tree = tree,
				grl = grl,
				files = files_text,
				keep = keep,
				start = ranges[process_number][0],
				end = ranges[process_number][1],
				output_name = output_name,
				process_number = process_number,
				error_file_name = error_file_name,
				logger_file_name = logger_file_name,	
				))
	
	#submit condor job
	try:
		print call('condor_submit condor.submit').strip()
	except Exception:
		error_text = 'Error occured in initialization\n'+traceback.format_exc()
		cleanup('',error_text,output_name)
		
	#attach to monitoring files
	error_file = None
	logger_file = None
	while(1):
		if not error_file and os.path.exists(error_file_name): error_file = open(error_file_name,'r+')
		if not logger_file and os.path.exists(logger_file_name): logger_file = open(logger_file_name,'r+')
		if None not in [
			error_file,
			logger_file,
			]: break
		if os.path.exists('done'):
			cleanup('','An unknown error has prevented log file creation before job finished.',output_name)
		sleep(1)

	#monitor job
	while not os.path.exists('done'):
		logger_text = logger_file.read()
		if logger_text: print logger_text.strip()
		error_text = error_file.read()
		if error_text: cleanup(logger_file.read(),error_text,output_name)
		sleep(0.5)

	cleanup(logger_file.read(),error_file.read(),output_name)	
예제 #2
0
def call_grid(
    module_name,
    analysis_name,
    grid_jsons,
    tree='physics',
    grl= None,
    num_processes=1,
    keep=False,
    merge=False,
    jobsize=1,
    help=False,
    ):

    import os
    import sys
    import atexit
    import shutil
    from time import sleep, time
    import string
    import random
    from common.external import call
    import tarfile
    import json
    import itertools

    #parse all first because we change directory and finds json problems
    grid_datas = []
    for grid_json in grid_jsons:
        with open(grid_json) as f: grid_datas.append(json.load(f))

    analysis_home = os.getenv('ANALYSISHOME')
    analysis_framework = os.getenv('ANALYSISFRAMEWORK')

    analysis_constructor = __import__(module_name,globals(),locals(),[analysis_name]).__dict__[analysis_name]

    analysis_instance = analysis_constructor()
    if help: sys.exit(2)

    while True:
        directory = '/tmp/'+''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(10))
        try: os.mkdir(directory)
        except OSError as error:
            if error.errno != 17: raise
            continue
        break
    print 'Created temporary directory {0}'.format(directory)

    cwd = os.getcwd()
    atexit.register(os.chdir,cwd)
    atexit.register(shutil.rmtree,os.path.abspath(directory))
    os.chdir(directory)

    #create tarball of working directory
    print 'Creating tarball'
    tarball = tarfile.open('send.tar.gz','w:gz')

    os.chdir(analysis_framework+'/../')
    tarball.add(os.path.basename(analysis_framework))
    os.chdir(analysis_home+'/../')
    tarball.add(os.path.basename(analysis_home))
    tarball.close()

    os.chdir(directory)

    exclude_sites = [
        'ANALY_INFN-NAPOLI',
        'ANALY_ARC',
        'ANALY_INFN-FRASCATI',
        'ANALY_FZK_SHORT',
        #'ANALY_MWT2_SL6',
        ]

    args = []

    for i,(k,g) in enumerate(itertools.groupby(sys.argv,lambda x:x=='-')):
        if not i: continue
        g=list(g)
        args += g[:]
    
    for grid_data in grid_datas:

        grl = grid_data.get('GRL')

        grid_command = 'unset tmp; unset tmpdir; source analysis-framework/setup.sh; source {analysis_home}/setup.sh; analyze.py -m {module} -a {analysis} -i \`echo %IN | sed \'s/,/ /g\'\` -o skim.root -p {processes} -n {tree}{keep}{grl} {args}'.format(
            module=module_name,
            analysis=analysis_name,
            tree=tree,
            processes=num_processes,
            analysis_home=os.path.basename(analysis_home),
            keep=' --keep' if keep else '',
            grl = ' -g {0}'.format(' '.join(grl)) if grl else '',
            args = ' '.join(args)
            )
    
        make_command = 'unset tmp; unset tmpdir; source analysis-framework/setup.sh; source {analysis_home}/setup.sh; python {analysis_home}/make_externals.py'.format(
            analysis_home=os.path.basename(analysis_home),
            )

        prun_command = 'prun --bexec="{make_command}" --exec "{grid_command}" --rootVer="5.34.07" --cmtConfig="x86_64-slc5-gcc43-opt" --outputs="skim.root" --inDsTxt=input_datasets.txt --outDS={output_name} --inTarBall=send.tar.gz {jobsize}{exclude_sites}{ngb} --useContElementBoundary{merge}'

        for output_name,input_datasets in grid_data.get('datasets').items():

            if 'steering' in grid_data:
                output_name = output_name.format(**grid_data['steering'])

            with open('input_datasets.txt','w') as f:
                for input_dataset in input_datasets:
                    f.write(input_dataset+'\n')

            final_prun_command = prun_command.format(
                grid_command=grid_command,
                make_command=make_command,
                output_name=output_name,
                merge=' --mergeOutput' if merge else '',
                jobsize='--nFilesPerJob='+str(jobsize) if jobsize else '',
                exclude_sites=' --excludedSite="'+','.join(exclude_sites)+'"' if exclude_sites else '',
                ngb=' --nGBPerJob=MAX' if not jobsize else '',
                )

            print final_prun_command
            print call(final_prun_command,verbose=True).strip()