def create_log(job, logfile, tarball_name): """ :param job: :param logfile: :param tarball_name: :raises LogFileCreationFailure: in case of log file creation problem :return: """ log = get_logger(job.jobid) log.debug('preparing to create log file') # perform special cleanup (user specific) prior to log file creation pilot_user = os.environ.get('PILOT_USER', 'generic').lower() user = __import__('pilot.user.%s.common' % pilot_user, globals(), locals(), [pilot_user], -1) user.remove_redundant_files(job.workdir) input_files = [e.lfn for e in job.indata] output_files = [e.lfn for e in job.outdata] # remove any present input/output files before tarring up workdir for f in input_files + output_files: path = os.path.join(job.workdir, f) if os.path.exists(path): log.info('removing file: %s' % path) remove(path) # rename the workdir for the tarball creation newworkdir = os.path.join(os.path.dirname(job.workdir), tarball_name) orgworkdir = job.workdir log.debug('renaming %s to %s' % (job.workdir, newworkdir)) os.rename(job.workdir, newworkdir) job.workdir = newworkdir fullpath = os.path.join(job.workdir, logfile.lfn) # /some/path/to/dirname/log.tgz log.info('will create archive %s' % fullpath) try: #newdirnm = "tarball_PandaJob_%s" % job.jobid #tarballnm = "%s.tar.gz" % newdirnm #os.rename(job.workdir, newdirnm) cmd = "pwd;tar cvfz %s %s --dereference --one-file-system; echo $?" % ( fullpath, tarball_name) exit_code, stdout, stderr = execute(cmd) #with closing(tarfile.open(name=fullpath, mode='w:gz', dereference=True)) as archive: # archive.add(os.path.basename(job.workdir), recursive=True) except Exception as e: raise LogFileCreationFailure(e) else: log.debug('stdout = %s' % stdout) log.debug('renaming %s back to %s' % (job.workdir, orgworkdir)) try: os.rename(job.workdir, orgworkdir) except Exception as e: log.debug('exception caught: %s' % e) job.workdir = orgworkdir
def create_log(workdir, logfile_name, tarball_name, cleanup, input_files=[], output_files=[], is_looping=False, debugmode=False): """ Create the tarball for the job. :param workdir: work directory for the job (string). :param logfile_name: log file name (string). :param tarball_name: tarball name (string). :param cleanup: perform cleanup (Boolean). :param input_files: list of input files to remove (list). :param output_files: list of output files to remove (list). :param is_looping: True for looping jobs, False by default (Boolean). :param debugmode: True if debug mode has been switched on (Boolean). :raises LogFileCreationFailure: in case of log file creation problem. :return: """ logger.debug('preparing to create log file (debug mode=%s)', str(debugmode)) # PILOT_HOME is the launch directory of the pilot (or the one specified in pilot options as pilot workdir) pilot_home = os.environ.get('PILOT_HOME', os.getcwd()) current_dir = os.getcwd() if pilot_home != current_dir: os.chdir(pilot_home) # perform special cleanup (user specific) prior to log file creation if cleanup: pilot_user = os.environ.get('PILOT_USER', 'generic').lower() user = __import__('pilot.user.%s.common' % pilot_user, globals(), locals(), [pilot_user], 0) # Python 2/3 user.remove_redundant_files(workdir, islooping=is_looping, debugmode=debugmode) # remove any present input/output files before tarring up workdir for fname in input_files + output_files: path = os.path.join(workdir, fname) if os.path.exists(path): logger.info('removing file: %s', path) remove(path) # rename the workdir for the tarball creation newworkdir = os.path.join(os.path.dirname(workdir), tarball_name) orgworkdir = workdir os.rename(workdir, newworkdir) workdir = newworkdir fullpath = os.path.join(workdir, logfile_name) # /some/path/to/dirname/log.tgz logger.info('will create archive %s', fullpath) try: cmd = "pwd;tar cvfz %s %s --dereference --one-file-system; echo $?" % (fullpath, tarball_name) _, stdout, _ = execute(cmd) except Exception as error: raise LogFileCreationFailure(error) else: if pilot_home != current_dir: os.chdir(pilot_home) logger.debug('stdout = %s', stdout) try: os.rename(workdir, orgworkdir) except Exception as error: logger.debug('exception caught: %s', error)
def perform_initial_payload_error_analysis(job, exit_code): """ Perform an initial analysis of the payload. Singularity errors are caught here. :param job: job object. :param exit_code: exit code from payload execution. :return: """ log = get_logger(job.jobid, logger) if exit_code != 0: msg = "" ec = 0 log.warning('main payload execution returned non-zero exit code: %d' % exit_code) stderr = read_file( os.path.join(job.workdir, config.Payload.payloadstderr)) if stderr != "": msg = errors.extract_stderr_error(stderr) if msg == "": # look for warning messages instead (might not be fatal so do not set UNRECOGNIZEDTRFSTDERR) msg = errors.extract_stderr_warning(stderr) fatal = False else: fatal = True if msg != "": log.warning("extracted message from stderr:\n%s" % msg) ec = set_error_code_from_stderr(msg, fatal) if not ec: ec = errors.resolve_transform_error(exit_code, stderr) if ec != 0: if msg: msg = errors.format_diagnostics(ec, msg) job.piloterrorcodes, job.piloterrordiags = errors.add_error_code( ec, msg=msg) else: if job.piloterrorcodes: log.warning('error code(s) already set: %s' % str(job.piloterrorcodes)) else: if os.path.exists(os.path.join(job.workdir, "core")): log.warning("detected a core dump file (will be removed)") remove(os.path.join(job.workdir, "core")) job.piloterrorcodes, job.piloterrordiags = errors.add_error_code( errors.COREDUMP) else: log.warning( 'initial error analysis did not resolve the issue') else: log.info( 'main payload execution returned zero exit code, but will check it more carefully' )
def precleanup(): """ Pre-cleanup at the beginning of the job to remove any pre-existing files from previous jobs in the main work dir. :return: """ logger.debug( 'performing pre-cleanup of potentially pre-existing files from earlier job in main work dir' ) path = os.path.join(os.environ.get('PILOT_HOME'), get_memory_monitor_summary_filename()) if os.path.exists(path): logger.info('removing no longer needed file: %s' % path) remove(path)
def postprocess_workdir(workdir): """ Post-processing of working directory. Unlink paths. :param workdir: path to directory to be processed (string). :raises FileHandlingFailure: in case of IOError. """ pseudo_dir = "poolcond" try: if os.path.exists(pseudo_dir): remove(os.path.join(workdir, pseudo_dir)) except IOError: raise FileHandlingFailure( "Post processing of working directory failed")
def remove_job_request_file(): """ Remove an old job request file when it is no longer needed. :return: """ path = get_job_request_file_name() if exists(path): if remove(path) == 0: logger.info('removed %s' % path) else: logger.debug('there is no job request file')
def create_log(job, logfile, tarball_name, args): """ :param job: :param logfile: :param tarball_name: :raises LogFileCreationFailure: in case of log file creation problem :return: """ logger.debug('preparing to create log file') pilot_home = os.environ.get('PILOT_HOME', os.getcwd()) current_dir = os.getcwd() if pilot_home != current_dir: logger.debug('cd from %s to %s for log creation' % (current_dir, pilot_home)) os.chdir(pilot_home) # perform special cleanup (user specific) prior to log file creation if args.cleanup: pilot_user = os.environ.get('PILOT_USER', 'generic').lower() user = __import__('pilot.user.%s.common' % pilot_user, globals(), locals(), [pilot_user], 0) # Python 2/3 user.remove_redundant_files(job.workdir, islooping=errors.LOOPINGJOB in job.piloterrorcodes) else: logger.debug('user specific cleanup not performed') input_files = [e.lfn for e in job.indata] output_files = [e.lfn for e in job.outdata] # remove any present input/output files before tarring up workdir for f in input_files + output_files: path = os.path.join(job.workdir, f) if os.path.exists(path): logger.info('removing file: %s' % path) remove(path) # rename the workdir for the tarball creation newworkdir = os.path.join(os.path.dirname(job.workdir), tarball_name) orgworkdir = job.workdir logger.debug('renaming %s to %s' % (job.workdir, newworkdir)) os.rename(job.workdir, newworkdir) job.workdir = newworkdir fullpath = os.path.join(job.workdir, logfile.lfn) # /some/path/to/dirname/log.tgz logger.info('will create archive %s' % fullpath) try: #newdirnm = "tarball_PandaJob_%s" % job.jobid #tarballnm = "%s.tar.gz" % newdirnm #os.rename(job.workdir, newdirnm) cmd = "pwd;tar cvfz %s %s --dereference --one-file-system; echo $?" % ( fullpath, tarball_name) exit_code, stdout, stderr = execute(cmd) #with closing(tarfile.open(name=fullpath, mode='w:gz', dereference=True)) as archive: # archive.add(os.path.basename(job.workdir), recursive=True) except Exception as e: raise LogFileCreationFailure(e) else: if pilot_home != current_dir: logger.debug('cd from %s to %s after log creation' % (pilot_home, current_dir)) os.chdir(pilot_home) logger.debug('stdout = %s' % stdout) # verify the size of the log file size = get_local_file_size(fullpath) if size < 1024: logger.warning('log file size too small: %d B' % size) else: logger.info('log file size: %d B' % size) logger.debug('renaming %s back to %s' % (job.workdir, orgworkdir)) try: os.rename(job.workdir, orgworkdir) except Exception as e: logger.debug('exception caught: %s' % e) job.workdir = orgworkdir