def failed_restart( unique_resource_name, exe, job_id, configuration, ): """Helper for notifying grid_script when a exe restart failed""" # returns a tuple (bool status, str msg) send_message = 'RESTARTEXEFAILED %s %s %s\n'\ % (unique_resource_name, exe, job_id) status = send_message_to_grid_script(send_message, configuration.logger, configuration) if not status: return (False, 'Fatal error: Could not write message to grid_script') return (True, 'Notified server about failed restart')
def failed_restart( unique_resource_name, exe, job_id, configuration, ): """Helper for notifying grid_script when a exe restart failed""" # returns a tuple (bool status, str msg) send_message = 'RESTARTEXEFAILED %s %s %s\n'\ % (unique_resource_name, exe, job_id) status = send_message_to_grid_script(send_message, configuration.logger, configuration) if not status: return (False, 'Fatal error: Could not write message to grid_script') return (True, 'Notified server about failed restart')
def finished_job( session_id, unique_resource_name, exe, job_id, configuration, ): """Helper for notifying grid_script when a job finishes""" # returns a tuple (bool status, str msg) send_message = 'RESOURCEFINISHEDJOB %s %s %s %s\n'\ % (unique_resource_name, exe, session_id, job_id) status = send_message_to_grid_script(send_message, configuration.logger, configuration) if not status: return (False, 'Fatal error: Could not write message to grid_script') return (True, 'Notified server about finished job')
def finished_job( session_id, unique_resource_name, exe, job_id, configuration, ): """Helper for notifying grid_script when a job finishes""" # returns a tuple (bool status, str msg) send_message = 'RESOURCEFINISHEDJOB %s %s %s %s\n'\ % (unique_resource_name, exe, session_id, job_id) status = send_message_to_grid_script(send_message, configuration.logger, configuration) if not status: return (False, 'Fatal error: Could not write message to grid_script') return (True, 'Notified server about finished job')
def migrated_job(filename, client_id, configuration): """returns a tuple (bool status, str msg)""" logger = configuration.logger client_dir = client_id_dir(client_id) job_path = os.path.abspath( os.path.join(configuration.server_home, client_dir, filename)) # unpickle and enqueue received job file job_path_spaces = job_path.replace('\\ ', '\\\\\\ ') job = io.unpickle(job_path_spaces, configuration.logger) # TODO: update any fields to mark migration? if not job: return (False, 'Fatal migration error: loading pickled job (%s) failed! ' % \ job_path_spaces) job_id = job['JOB_ID'] # save file with other mRSL files mrsl_filename = \ os.path.abspath(os.path.join(configuration.mrsl_files_dir, client_dir, job_id + '.mRSL')) if not io.pickle(job, mrsl_filename, configuration.logger): return (False, 'Fatal error: Could not write ' + filename) # tell 'grid_script' message = 'SERVERJOBFILE ' + client_dir + '/' + job_id + '\n' if not io.send_message_to_grid_script(message, logger, configuration): return (False, 'Fatal error: Could not write to grid stdin') # TODO: do we need to wait for grid_script to ack job reception? # ... same question applies to new_job, btw. return (True, '%s succesfully migrated.' % job_id)
if not pickle(global_dict, filename, logger): return (False, 'Fatal error: Could not write %s' % filename) if not outfile == 'AUTOMATIC': # an outfile was specified, so this is just for testing - dont tell # grid_script return (True, '') # tell 'grid_script' message = 'USERJOBFILE %s/%s\n' % (client_dir, job_id) if not send_message_to_grid_script(message, logger, configuration): return (False, '''Fatal error: Could not get exclusive access or write to %s''' % configuration.grid_stdin) if forceddestination and forceddestination.has_key('RE_NAME'): # add job_id to runtime environment verification history unique_resource_name = forceddestination['UNIQUE_RESOURCE_NAME'] re_name = forceddestination['RE_NAME'] resource_config_filename = configuration.resource_home\ + unique_resource_name + '/config' # open resource config
def main(client_id, user_arguments_dict): """Main function used by front end""" (configuration, logger, output_objects, op_name) = \ initialize_main_variables(client_id) client_dir = client_id_dir(client_id) defaults = signature()[1] (validate_status, accepted) = validate_input_and_cert( user_arguments_dict, defaults, output_objects, client_id, configuration, allow_rejects=False, ) if not validate_status: return (accepted, returnvalues.CLIENT_ERROR) patterns = accepted['job_id'] if not safe_handler(configuration, 'post', op_name, client_id, get_csrf_limit(configuration), accepted): output_objects.append({ 'object_type': 'error_text', 'text': '''Only accepting CSRF-filtered POST requests to prevent unintended updates''' }) return (output_objects, returnvalues.CLIENT_ERROR) if not configuration.site_enable_jobs: output_objects.append({ 'object_type': 'error_text', 'text': '''Job execution is not enabled on this system''' }) return (output_objects, returnvalues.SYSTEM_ERROR) # Please note that base_dir must end in slash to avoid access to other # user dirs when own name is a prefix of another user name base_dir = \ os.path.abspath(os.path.join(configuration.mrsl_files_dir, client_dir)) + os.sep status = returnvalues.OK filelist = [] for pattern in patterns: pattern = pattern.strip() # Backward compatibility - all_jobs keyword should match all jobs if pattern == all_jobs: pattern = '*' # Check directory traversal attempts before actual handling to avoid # leaking information about file system layout while allowing # consistent error messages unfiltered_match = glob.glob(base_dir + pattern + '.mRSL') match = [] for server_path in unfiltered_match: # IMPORTANT: path must be expanded to abs for proper chrooting abs_path = os.path.abspath(server_path) if not valid_user_path(configuration, abs_path, base_dir, True): # out of bounds - save user warning for later to allow # partial match: # ../*/* is technically allowed to match own files. logger.warning('%s tried to %s restricted path %s ! (%s)' % (client_id, op_name, abs_path, pattern)) continue # Insert valid job files in filelist for later treatment match.append(abs_path) # Now actually treat list of allowed matchings and notify if no # (allowed) match if not match: output_objects.append({ 'object_type': 'error_text', 'text': '%s: You do not have any matching job IDs!' % pattern }) status = returnvalues.CLIENT_ERROR else: filelist += match # job schedule is hard on the server, limit if len(filelist) > 100: output_objects.append({ 'object_type': 'error_text', 'text': 'Too many matching jobs (%s)!' % len(filelist) }) return (output_objects, returnvalues.CLIENT_ERROR) saveschedulejobs = [] for filepath in filelist: # Extract job_id from filepath (replace doesn't modify filepath) mrsl_file = filepath.replace(base_dir, '') job_id = mrsl_file.replace('.mRSL', '') saveschedulejob = {'object_type': 'saveschedulejob', 'job_id': job_id} dict = unpickle(filepath, logger) if not dict: saveschedulejob['message'] = \ ('The file containing the information' \ ' for job id %s could not be opened!' \ ' You can only read schedule for ' \ 'your own jobs!') % job_id saveschedulejobs.append(saveschedulejob) status = returnvalues.CLIENT_ERROR continue saveschedulejob['oldstatus'] = dict['STATUS'] # Is the job status pending? possible_schedule_states = ['QUEUED', 'RETRY', 'FROZEN'] if not dict['STATUS'] in possible_schedule_states: saveschedulejob['message'] = \ 'You can only read schedule for jobs with status: %s.'\ % ' or '.join(possible_schedule_states) saveschedulejobs.append(saveschedulejob) continue # notify queue if not send_message_to_grid_script('JOBSCHEDULE ' + job_id + '\n', logger, configuration): output_objects.append({ 'object_type': 'error_text', 'text': 'Error sending message to grid_script, update may fail.' }) status = returnvalues.SYSTEM_ERROR continue saveschedulejobs.append(saveschedulejob) savescheduleinfo = """Please find any available job schedule status in verbose job status output.""" output_objects.append({ 'object_type': 'saveschedulejobs', 'saveschedulejobs': saveschedulejobs, 'savescheduleinfo': savescheduleinfo }) return (output_objects, status)
def main(client_id, user_arguments_dict): """Main function used by front end""" (configuration, logger, output_objects, op_name) = \ initialize_main_variables(client_id) client_dir = client_id_dir(client_id) defaults = signature()[1] (validate_status, accepted) = validate_input_and_cert( user_arguments_dict, defaults, output_objects, client_id, configuration, allow_rejects=False, ) if not validate_status: return (accepted, returnvalues.CLIENT_ERROR) patterns = accepted['job_id'] action = accepted['action'][-1] if not safe_handler(configuration, 'post', op_name, client_id, get_csrf_limit(configuration), accepted): output_objects.append({ 'object_type': 'error_text', 'text': '''Only accepting CSRF-filtered POST requests to prevent unintended updates''' }) return (output_objects, returnvalues.CLIENT_ERROR) if not configuration.site_enable_jobs: output_objects.append({ 'object_type': 'error_text', 'text': '''Job execution is not enabled on this system''' }) return (output_objects, returnvalues.SYSTEM_ERROR) if not action in valid_actions.keys(): output_objects.append({ 'object_type': 'error_text', 'text': 'Invalid job action "%s" (only %s supported)' % (action, ', '.join(valid_actions.keys())) }) return (output_objects, returnvalues.CLIENT_ERROR) new_state = valid_actions[action] # Please note that base_dir must end in slash to avoid access to other # user dirs when own name is a prefix of another user name base_dir = \ os.path.abspath(os.path.join(configuration.mrsl_files_dir, client_dir)) + os.sep status = returnvalues.OK filelist = [] for pattern in patterns: pattern = pattern.strip() # Backward compatibility - all_jobs keyword should match all jobs if pattern == all_jobs: pattern = '*' # Check directory traversal attempts before actual handling to avoid # leaking information about file system layout while allowing # consistent error messages unfiltered_match = glob.glob(base_dir + pattern + '.mRSL') match = [] for server_path in unfiltered_match: # IMPORTANT: path must be expanded to abs for proper chrooting abs_path = os.path.abspath(server_path) if not valid_user_path(configuration, abs_path, base_dir, True): # out of bounds - save user warning for later to allow # partial match: # ../*/* is technically allowed to match own files. logger.error( '%s tried to use %s %s outside own home! (pattern %s)' % (client_id, op_name, abs_path, pattern)) continue # Insert valid job files in filelist for later treatment match.append(abs_path) # Now actually treat list of allowed matchings and notify if no # (allowed) match if not match: output_objects.append({ 'object_type': 'error_text', 'text': '%s: You do not have any matching job IDs!' % pattern }) status = returnvalues.CLIENT_ERROR else: filelist += match # job state change is hard on the server, limit if len(filelist) > 500: output_objects.append({ 'object_type': 'error_text', 'text': 'Too many matching jobs (%s)!' % len(filelist) }) return (output_objects, returnvalues.CLIENT_ERROR) changedstatusjobs = [] for filepath in filelist: # Extract job_id from filepath (replace doesn't modify filepath) mrsl_file = filepath.replace(base_dir, '') job_id = mrsl_file.replace('.mRSL', '') changedstatusjob = { 'object_type': 'changedstatusjob', 'job_id': job_id } job_dict = unpickle(filepath, logger) if not job_dict: changedstatusjob['message'] = '''The file containing the information for job id %s could not be opened! You can only %s your own jobs!''' % (job_id, action) changedstatusjobs.append(changedstatusjob) status = returnvalues.CLIENT_ERROR continue changedstatusjob['oldstatus'] = job_dict['STATUS'] # Is the job status compatible with action? possible_cancel_states = [ 'PARSE', 'QUEUED', 'RETRY', 'EXECUTING', 'FROZEN' ] if action == 'cancel' and \ not job_dict['STATUS'] in possible_cancel_states: changedstatusjob['message'] = \ 'You can only cancel jobs with status: %s.'\ % ' or '.join(possible_cancel_states) status = returnvalues.CLIENT_ERROR changedstatusjobs.append(changedstatusjob) continue possible_freeze_states = ['QUEUED', 'RETRY'] if action == 'freeze' and \ not job_dict['STATUS'] in possible_freeze_states: changedstatusjob['message'] = \ 'You can only freeze jobs with status: %s.'\ % ' or '.join(possible_freeze_states) status = returnvalues.CLIENT_ERROR changedstatusjobs.append(changedstatusjob) continue possible_thaw_states = ['FROZEN'] if action == 'thaw' and \ not job_dict['STATUS'] in possible_thaw_states: changedstatusjob['message'] = \ 'You can only thaw jobs with status: %s.'\ % ' or '.join(possible_thaw_states) status = returnvalues.CLIENT_ERROR changedstatusjobs.append(changedstatusjob) continue # job action is handled by changing the STATUS field, notifying the # job queue and making sure the server never submits jobs with status # FROZEN or CANCELED. # file is repickled to ensure newest information is used, job_dict # might be old if another script has modified the file. if not unpickle_and_change_status(filepath, new_state, logger): output_objects.append({ 'object_type': 'error_text', 'text': 'Job status could not be changed to %s!' % new_state }) status = returnvalues.SYSTEM_ERROR # Avoid key error and make sure grid_script gets expected number of # arguments if not job_dict.has_key('UNIQUE_RESOURCE_NAME'): job_dict['UNIQUE_RESOURCE_NAME'] = \ 'UNIQUE_RESOURCE_NAME_NOT_FOUND' if not job_dict.has_key('EXE'): job_dict['EXE'] = 'EXE_NAME_NOT_FOUND' # notify queue if not send_message_to_grid_script( 'JOBACTION ' + job_id + ' ' + job_dict['STATUS'] + ' ' + new_state + ' ' + job_dict['UNIQUE_RESOURCE_NAME'] + ' ' + job_dict['EXE'] + '\n', logger, configuration): output_objects.append({ 'object_type': 'error_text', 'text': '''Error sending message to grid_script, job may still be in the job queue.''' }) status = returnvalues.SYSTEM_ERROR continue changedstatusjob['newstatus'] = new_state changedstatusjobs.append(changedstatusjob) output_objects.append({ 'object_type': 'changedstatusjobs', 'changedstatusjobs': changedstatusjobs }) return (output_objects, status)
def check_mrsl_files( configuration, job_queue, executing_queue, only_new, logger, ): """Check job files on disk in order to initialize job queue after (re)start of grid_script. """ # We only check files modified since last start if possible last_start = 0 last_start_file = os.path.join(configuration.mig_system_files, 'grid_script_laststart') if os.path.exists(last_start_file): last_start = os.path.getmtime(last_start_file) check_mrsl_files_start_time = time.time() # TODO: switch to listdir or glob? all files are in mrsl_files_dir/*/*.mRSL for (root, _, files) in os.walk(configuration.mrsl_files_dir): # skip all dot dirs - they are from repos etc and _not_ jobs if root.find(os.sep + '.') != -1: continue # skip all dirs without any recent changes if only_new and os.path.getmtime(root) < last_start: logger.info('check mRSL files: skipping unchanged dir: %s' % root) continue logger.info('check mRSL files: inspecting %d files in %s' % \ (len(files), root)) file_count = 0 for name in files: filename = os.path.join(root, name) file_count += 1 if file_count % 1000 == 0: logger.info('check mRSL files: %d files in %s checked' % \ (file_count, root)) if os.path.getmtime(filename) < last_start: if only_new: #logger.debug('skipping treated mrsl file: %s' # % filename) continue logger.info('parsing possibly outdated mrsl file: %s' % filename) job_dict = io.unpickle(filename, logger) if not job_dict: logger.error('could not open and unpickle: %s' % filename) continue if job_dict['STATUS'] == 'PARSE': # parse is ok, since mRSL file exists # tell 'grid_script' and let grid_script put it into the queue logger.info('Found a file with PARSE status: %s' % job_dict['JOB_ID']) job_id = job_dict['JOB_ID'] client_id = job_dict['USER_CERT'] client_dir = client_id_dir(client_id) message = 'USERJOBFILE %s/%s\n' % (client_dir, job_id) if not send_message_to_grid_script(message, logger, configuration): print 'Fatal error: Could not write to grid stdin' elif job_dict['STATUS'] == 'QUEUED'\ and not job_queue.get_job_by_id(job_dict['JOB_ID']): # put in job queue logger.info('USERJOBFILE: There were %s jobs in the job_queue' % job_queue.queue_length()) job_queue.enqueue_job(job_dict, job_queue.queue_length()) logger.info("Now there's %s (QUEUED job %s added)" % (job_queue.queue_length(), job_dict['JOB_ID'])) elif job_dict['STATUS'] == 'EXECUTING'\ and not executing_queue.get_job_by_id(job_dict['JOB_ID' ]): # put in executing queue logger.info('USERJOBFILE: There were %s jobs in the executing_queue' % executing_queue.queue_length()) executing_queue.enqueue_job(job_dict, executing_queue.queue_length()) logger.info("Now there's %s (EXECUTING job %s added)" % (executing_queue.queue_length(), job_dict['JOB_ID'])) else: # logger.debug('Job in %s is already treated' % filename) continue # update last_start_file access times. Note the timestamp is not "now" but # when check_mrsl_files was called to avoid loosing any jobs being parsed # at the same time as this function is running. logger.info('setting time of last_start_file %s to %s' % (last_start_file, check_mrsl_files_start_time)) io.touch(last_start_file, check_mrsl_files_start_time) check_mrsl_files_end_time = time.time() logger.info('finished checking for mRSL files in %fs' % \ (check_mrsl_files_end_time-check_mrsl_files_start_time))
if not pickle(global_dict, filename, logger): return (False, 'Fatal error: Could not write %s' % filename) if not outfile == 'AUTOMATIC': # an outfile was specified, so this is just for testing - dont tell # grid_script return (True, '') # tell 'grid_script' message = 'USERJOBFILE %s/%s\n' % (client_dir, job_id) if not send_message_to_grid_script(message, logger, configuration): return (False, '''Fatal error: Could not get exclusive access or write to %s''' % configuration.grid_stdin) if forceddestination and forceddestination.has_key('RE_NAME'): # add job_id to runtime environment verification history unique_resource_name = forceddestination['UNIQUE_RESOURCE_NAME'] re_name = forceddestination['RE_NAME'] resource_config_filename = configuration.resource_home\ + unique_resource_name + '/config' # open resource config
def check_mrsl_files( configuration, job_queue, executing_queue, only_new, logger, ): """Check job files on disk in order to initialize job queue after (re)start of grid_script. """ # We only check files modified since last start if possible last_start = 0 last_start_file = os.path.join(configuration.mig_system_files, 'grid_script_laststart') if os.path.exists(last_start_file): last_start = os.path.getmtime(last_start_file) check_mrsl_files_start_time = time.time() # TODO: switch to listdir or glob? all files are in mrsl_files_dir/*/*.mRSL for (root, _, files) in os.walk(configuration.mrsl_files_dir): # skip all dot dirs - they are from repos etc and _not_ jobs if root.find(os.sep + '.') != -1: continue # skip all dirs without any recent changes if only_new and os.path.getmtime(root) < last_start: logger.info('check mRSL files: skipping unchanged dir: %s' % root) continue logger.info('check mRSL files: inspecting %d files in %s' % \ (len(files), root)) file_count = 0 for name in files: filename = os.path.join(root, name) file_count += 1 if file_count % 1000 == 0: logger.info('check mRSL files: %d files in %s checked' % \ (file_count, root)) if os.path.getmtime(filename) < last_start: if only_new: #logger.debug('skipping treated mrsl file: %s' # % filename) continue logger.info('parsing possibly outdated mrsl file: %s' % filename) job_dict = io.unpickle(filename, logger) if not job_dict: logger.error('could not open and unpickle: %s' % filename) continue if job_dict['STATUS'] == 'PARSE': # parse is ok, since mRSL file exists # tell 'grid_script' and let grid_script put it into the queue logger.info('Found a file with PARSE status: %s' % job_dict['JOB_ID']) job_id = job_dict['JOB_ID'] client_id = job_dict['USER_CERT'] client_dir = client_id_dir(client_id) message = 'USERJOBFILE %s/%s\n' % (client_dir, job_id) if not send_message_to_grid_script(message, logger, configuration): print 'Fatal error: Could not write to grid stdin' elif job_dict['STATUS'] == 'QUEUED'\ and not job_queue.get_job_by_id(job_dict['JOB_ID']): # put in job queue logger.info( 'USERJOBFILE: There were %s jobs in the job_queue' % job_queue.queue_length()) job_queue.enqueue_job(job_dict, job_queue.queue_length()) logger.info("Now there's %s (QUEUED job %s added)" % (job_queue.queue_length(), job_dict['JOB_ID'])) elif job_dict['STATUS'] == 'EXECUTING'\ and not executing_queue.get_job_by_id(job_dict['JOB_ID' ]): # put in executing queue logger.info( 'USERJOBFILE: There were %s jobs in the executing_queue' % executing_queue.queue_length()) executing_queue.enqueue_job(job_dict, executing_queue.queue_length()) logger.info( "Now there's %s (EXECUTING job %s added)" % (executing_queue.queue_length(), job_dict['JOB_ID'])) else: # logger.debug('Job in %s is already treated' % filename) continue # update last_start_file access times. Note the timestamp is not "now" but # when check_mrsl_files was called to avoid loosing any jobs being parsed # at the same time as this function is running. logger.info('setting time of last_start_file %s to %s' % (last_start_file, check_mrsl_files_start_time)) io.touch(last_start_file, configuration, timestamp=check_mrsl_files_start_time) check_mrsl_files_end_time = time.time() logger.info('finished checking for mRSL files in %fs' % \ (check_mrsl_files_end_time-check_mrsl_files_start_time))
def main(client_id, user_arguments_dict): """Main function used by front end""" (configuration, logger, output_objects, op_name) = \ initialize_main_variables(client_id, op_header=False) defaults = signature()[1] (validate_status, accepted) = validate_input_and_cert( user_arguments_dict, defaults, output_objects, client_id, configuration, allow_rejects=False, ) if not validate_status: return (accepted, returnvalues.CLIENT_ERROR) action = accepted['action'][-1] req_list = accepted['req_id'] job_list = accepted['job_id'] lines = int(accepted['lines'][-1]) meta = '''<meta http-equiv="refresh" content="%s" /> ''' % configuration.sleep_secs style = themed_styles(configuration) script = ''' <script type="text/javascript" src="/images/js/jquery.js"></script> <script type="text/javascript" src="/images/js/jquery.tablesorter.js"></script> <script type="text/javascript" src="/images/js/jquery.tablesorter.pager.js"> </script> <script type="text/javascript" src="/images/js/jquery.tablesorter.widgets.js"></script> <script type="text/javascript" src="/images/js/jquery-ui.js"></script> <script type="text/javascript" src="/images/js/jquery.confirm.js"></script> <script type="text/javascript" > $(document).ready(function() { // init confirmation dialog $( "#confirm_dialog" ).dialog( // see http://jqueryui.com/docs/dialog/ for options { autoOpen: false, modal: true, closeOnEscape: true, width: 500, buttons: { "Cancel": function() { $( "#" + name ).dialog("close"); } } }); // table initially sorted by col. 9 (created) var sortOrder = [[9,0]]; $("#certreqtable").tablesorter({widgets: ["zebra", "saveSort"], sortList:sortOrder }) .tablesorterPager({ container: $("#pager"), size: %s }); } ); </script> ''' % default_pager_entries title_entry = find_entry(output_objects, 'title') title_entry['text'] = '%s administration panel' % configuration.short_title title_entry['meta'] = meta title_entry['style'] = style title_entry['javascript'] = script output_objects.append({'object_type': 'html_form', 'text':''' <div id="confirm_dialog" title="Confirm" style="background:#fff;"> <div id="confirm_text"><!-- filled by js --></div> <textarea cols="40" rows="4" id="confirm_input" style="display:none;"></textarea> </div> ''' }) if not is_admin(client_id, configuration, logger): output_objects.append( {'object_type': 'error_text', 'text' : 'You must be an admin to access this control panel.'}) return (output_objects, returnvalues.CLIENT_ERROR) html = '' if action and not action in grid_actions.keys() + certreq_actions: output_objects.append({'object_type': 'error_text', 'text' : 'Invalid action: %s' % action}) return (output_objects, returnvalues.SYSTEM_ERROR) if action in grid_actions: msg = "%s" % grid_actions[action] if job_list: msg += ' %s' % ' '.join(job_list) msg += '\n' if not send_message_to_grid_script(msg, logger, configuration): output_objects.append( {'object_type': 'error_text', 'text' : '''Error sending %s message to grid_script.''' % action }) status = returnvalues.SYSTEM_ERROR elif action in certreq_actions: if action == "addcertreq": for req_id in req_list: if accept_cert_req(req_id, configuration): output_objects.append( {'object_type': 'text', 'text': 'Accepted certificate request %s' % req_id}) else: output_objects.append( {'object_type': 'error_text', 'text': 'Accept certificate request failed - details in log' }) elif action == "delcertreq": for req_id in req_list: if delete_cert_req(req_id, configuration): output_objects.append( {'object_type': 'text', 'text': 'Deleted certificate request %s' % req_id}) else: output_objects.append( {'object_type': 'error_text', 'text': 'Delete certificate request failed - details in log' }) show, drop = '', '' general = """ <h1>Server Status</h1> <p class='importanttext'> This page automatically refreshes every %s seconds. </p> <p> You can see the current grid daemon status and server logs below. The buttons provide access to e.g. managing the grid job queues. </p> <form method='get' action='migadmin.py'> <input type='hidden' name='action' value='' /> <input type='submit' value='Show last log lines' /> <input type='text' size='2' name='lines' value='%s' /> </form> <br /> <form method='get' action='migadmin.py'> <input type='hidden' name='lines' value='%s' /> <input type='hidden' name='action' value='reloadconfig' /> <input type='submit' value='Reload Configuration' /> </form> <br /> """ % (configuration.sleep_secs, lines, lines) show += """ <form method='get' action='migadmin.py'> <input type='hidden' name='lines' value='%s' /> <input type='submit' value='Log Jobs' /> <select name='action'> """ % lines drop += """ <form method='get' action='migadmin.py'> <input type='hidden' name='lines' value='%s' /> <input type='submit' value='Drop Job' /> <select name='action'> """ % lines for queue in ['queued', 'executing', 'done']: selected = '' if action.find(queue) != -1: selected = 'selected' show += "<option %s value='show%s'>%s</option>" % (selected, queue, queue) drop += "<option %s value='drop%s'>%s</option>" % (selected, queue, queue) show += """ </select> </form> <br /> """ drop += """ </select> <input type='text' size='20' name='job_id' value='' /> </form> <br /> """ html += general html += show html += drop daemons = """ <div id='daemonstatus'> """ daemon_names = ['grid_script.py', 'grid_monitor.py', 'grid_sshmux.py'] # No need to run im_notify unless any im notify protocols are enabled if [i for i in configuration.notify_protocols if i != 'email']: daemon_names.append('grid_imnotify.py') if configuration.site_enable_sftp: daemon_names.append('grid_sftp.py') if configuration.site_enable_davs: daemon_names.append('grid_webdavs.py') if configuration.site_enable_ftps: daemon_names.append('grid_ftps.py') if configuration.site_enable_openid: daemon_names.append('grid_openid.py') for proc in daemon_names: pgrep_proc = subprocess.Popen(['pgrep', '-f', proc], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) pgrep_proc.wait() ps_out = pgrep_proc.stdout.read().strip() if pgrep_proc.returncode == 0: daemons += "<div class='status_online'>%s running (pid %s)</div>" \ % (proc, ps_out) else: daemons += "<div class='status_offline'>%s not running!</div>" % \ proc daemons += """</div> <br /> """ html += daemons output_objects.append({'object_type': 'header', 'text' : 'Pending Certificate Requests'}) (status, ret) = list_cert_reqs(configuration) if not status: logger.error("%s: failed for '%s': %s" % (op_name, client_id, ret)) output_objects.append({'object_type': 'error_text', 'text' : ret}) return (output_objects, returnvalues.SYSTEM_ERROR) certreqs = [] for req_id in ret: (load_status, req_dict) = get_cert_req(req_id, configuration) if not load_status: logger.error("%s: load failed for '%s': %s" % \ (op_name, req_id, req_dict)) output_objects.append({'object_type': 'error_text', 'text' : 'Could not read details for "%s"' % \ req_id}) return (output_objects, returnvalues.SYSTEM_ERROR) req_item = build_certreqitem_object(configuration, req_dict) js_name = 'create%s' % req_id helper = html_post_helper(js_name, 'migadmin.py', {'action': 'addcertreq', 'req_id': req_id}) output_objects.append({'object_type': 'html_form', 'text': helper}) req_item['addcertreqlink'] = { 'object_type': 'link', 'destination': "javascript: confirmDialog(%s, '%s');" % \ (js_name, 'Really accept %s?' % req_id), 'class': 'addlink', 'title': 'Accept %s' % req_id, 'text': ''} js_name = 'delete%s' % req_id helper = html_post_helper(js_name, 'migadmin.py', {'action': 'delcertreq', 'req_id': req_id}) output_objects.append({'object_type': 'html_form', 'text': helper}) req_item['delcertreqlink'] = { 'object_type': 'link', 'destination': "javascript: confirmDialog(%s, '%s');" % \ (js_name, 'Really remove %s?' % req_id), 'class': 'removelink', 'title': 'Remove %s' % req_id, 'text': ''} certreqs.append(req_item) output_objects.append({'object_type': 'table_pager', 'entry_name': 'pending certificate requests', 'default_entries': default_pager_entries}) output_objects.append({'object_type': 'certreqs', 'certreqs': certreqs}) log_path_list = [] if os.path.isabs(configuration.logfile): log_path_list.append(configuration.logfile) else: log_path_list.append(os.path.join(configuration.log_dir, configuration.logfile)) for log_path in log_path_list: html += ''' <h1>%s</h1> <textarea rows=%s cols=200 readonly="readonly"> ''' % (log_path, lines) try: logger.debug("loading %d lines from %s" % (lines, log_path)) log_fd = open(log_path, 'r') log_fd.seek(0, os.SEEK_END) size = log_fd.tell() pos = log_fd.tell() log_lines = [] step_size = 100 # locate last X lines while pos > 0 and len(log_lines) < lines: offset = min(lines * step_size, size) logger.debug("seek to offset %d from end of %s" % (offset, log_path)) log_fd.seek(-offset, os.SEEK_END) pos = log_fd.tell() log_lines = log_fd.readlines() step_size *= 2 logger.debug("reading %d lines from %s" % (lines, log_path)) html += ''.join(log_lines[-lines:]) log_fd.close() except Exception, exc: logger.error("reading %d lines from %s: %s" % (lines, log_path, exc)) output_objects.append({'object_type': 'error_text', 'text' : 'Error reading log (%s)' % exc}) return (output_objects, returnvalues.SYSTEM_ERROR) html += '''</textarea>
def main(client_id, user_arguments_dict): """Main function used by front end""" (configuration, logger, output_objects, op_name) = \ initialize_main_variables(client_id) client_dir = client_id_dir(client_id) defaults = signature()[1] (validate_status, accepted) = validate_input_and_cert( user_arguments_dict, defaults, output_objects, client_id, configuration, allow_rejects=False, ) if not validate_status: return (accepted, returnvalues.CLIENT_ERROR) if not correct_handler('POST'): output_objects.append( {'object_type': 'error_text', 'text' : 'Only accepting POST requests to prevent unintended updates'}) return (output_objects, returnvalues.CLIENT_ERROR) patterns = accepted['job_id'] # Please note that base_dir must end in slash to avoid access to other # user dirs when own name is a prefix of another user name base_dir = \ os.path.abspath(os.path.join(configuration.mrsl_files_dir, client_dir)) + os.sep status = returnvalues.OK filelist = [] for pattern in patterns: pattern = pattern.strip() # Backward compatibility - all_jobs keyword should match all jobs if pattern == all_jobs: pattern = '*' # Check directory traversal attempts before actual handling to avoid # leaking information about file system layout while allowing # consistent error messages unfiltered_match = glob.glob(base_dir + pattern + '.mRSL') match = [] for server_path in unfiltered_match: real_path = os.path.abspath(server_path) if not valid_user_path(real_path, base_dir, True): # out of bounds - save user warning for later to allow # partial match: # ../*/* is technically allowed to match own files. logger.warning('%s tried to %s restricted path %s ! (%s)' % (client_id, op_name, real_path, pattern)) continue # Insert valid job files in filelist for later treatment match.append(real_path) # Now actually treat list of allowed matchings and notify if no # (allowed) match if not match: output_objects.append( {'object_type': 'error_text', 'text' : '%s: You do not have any matching job IDs!' % pattern}) status = returnvalues.CLIENT_ERROR else: filelist += match # job schedule is hard on the server, limit if len(filelist) > 100: output_objects.append({'object_type': 'error_text', 'text' : 'Too many matching jobs (%s)!' % len(filelist)}) return (output_objects, returnvalues.CLIENT_ERROR) saveschedulejobs = [] for filepath in filelist: # Extract job_id from filepath (replace doesn't modify filepath) mrsl_file = filepath.replace(base_dir, '') job_id = mrsl_file.replace('.mRSL', '') saveschedulejob = {'object_type': 'saveschedulejob', 'job_id': job_id} dict = unpickle(filepath, logger) if not dict: saveschedulejob['message'] = \ ('The file containing the information' \ ' for job id %s could not be opened!' \ ' You can only read schedule for ' \ 'your own jobs!') % job_id saveschedulejobs.append(saveschedulejob) status = returnvalues.CLIENT_ERROR continue saveschedulejob['oldstatus'] = dict['STATUS'] # Is the job status pending? possible_schedule_states = ['QUEUED', 'RETRY', 'FROZEN'] if not dict['STATUS'] in possible_schedule_states: saveschedulejob['message'] = \ 'You can only read schedule for jobs with status: %s.'\ % ' or '.join(possible_schedule_states) saveschedulejobs.append(saveschedulejob) continue # notify queue if not send_message_to_grid_script('JOBSCHEDULE ' + job_id + '\n', logger, configuration): output_objects.append( {'object_type': 'error_text', 'text' : 'Error sending message to grid_script, update may fail.' }) status = returnvalues.SYSTEM_ERROR continue saveschedulejobs.append(saveschedulejob) savescheduleinfo = """Please find any available job schedule status in verbose job status output.""" output_objects.append({'object_type': 'saveschedulejobs', 'saveschedulejobs': saveschedulejobs, 'savescheduleinfo': savescheduleinfo}) return (output_objects, status)
def main(client_id, user_arguments_dict): """Main function used by front end""" (configuration, logger, output_objects, op_name) = \ initialize_main_variables(client_id, op_header=False) defaults = signature()[1] (validate_status, accepted) = validate_input_and_cert( user_arguments_dict, defaults, output_objects, client_id, configuration, allow_rejects=False, ) if not validate_status: return (accepted, returnvalues.CLIENT_ERROR) action = accepted['action'][-1] req_list = accepted['req_id'] job_list = accepted['job_id'] lines = int(accepted['lines'][-1]) meta = '''<meta http-equiv="refresh" content="%s" /> ''' % configuration.sleep_secs title_entry = find_entry(output_objects, 'title') title_entry['text'] = '%s administration panel' % configuration.short_title title_entry['meta'] = meta # jquery support for tablesorter and confirmation on "remove" # table initially sorted by col. 9 (created) table_spec = {'table_id': 'accountreqtable', 'sort_order': '[[9,0]]'} (add_import, add_init, add_ready) = man_base_js(configuration, [table_spec]) title_entry['script']['advanced'] += add_import title_entry['script']['init'] += add_init title_entry['script']['ready'] += add_ready output_objects.append({ 'object_type': 'html_form', 'text': man_base_html(configuration) }) if not is_admin(client_id, configuration, logger): output_objects.append({ 'object_type': 'error_text', 'text': 'You must be an admin to access this control panel.' }) return (output_objects, returnvalues.CLIENT_ERROR) html = '' if action and not action in grid_actions.keys() + accountreq_actions: output_objects.append({ 'object_type': 'error_text', 'text': 'Invalid action: %s' % action }) return (output_objects, returnvalues.SYSTEM_ERROR) if action in grid_actions: msg = "%s" % grid_actions[action] if job_list: msg += ' %s' % ' '.join(job_list) msg += '\n' if not send_message_to_grid_script(msg, logger, configuration): output_objects.append({ 'object_type': 'error_text', 'text': '''Error sending %s message to grid_script.''' % action }) status = returnvalues.SYSTEM_ERROR elif action in accountreq_actions: if action == "addaccountreq": for req_id in req_list: if accept_account_req(req_id, configuration): output_objects.append({ 'object_type': 'text', 'text': 'Accepted account request %s' % req_id }) else: output_objects.append({ 'object_type': 'error_text', 'text': 'Accept account request failed - details in log' }) elif action == "delaccountreq": for req_id in req_list: if delete_account_req(req_id, configuration): output_objects.append({ 'object_type': 'text', 'text': 'Deleted account request %s' % req_id }) else: output_objects.append({ 'object_type': 'error_text', 'text': 'Delete account request failed - details in log' }) show, drop = '', '' general = """ <h2>Server Status</h2> <p class='importanttext'> This page automatically refreshes every %s seconds. </p> <p> You can see the current grid daemon status and server logs below. The buttons provide access to e.g. managing the grid job queues. </p> <form method='get' action='migadmin.py'> <input type='hidden' name='action' value='' /> <input type='submit' value='Show last log lines' /> <input type='text' size='2' name='lines' value='%s' /> </form> <br /> <form method='get' action='migadmin.py'> <input type='hidden' name='lines' value='%s' /> <input type='hidden' name='action' value='reloadconfig' /> <input type='submit' value='Reload Configuration' /> </form> <br /> """ % (configuration.sleep_secs, lines, lines) show += """ <form method='get' action='migadmin.py'> <input type='hidden' name='lines' value='%s' /> <input type='submit' value='Log Jobs' /> <select name='action'> """ % lines drop += """ <form method='get' action='migadmin.py'> <input type='hidden' name='lines' value='%s' /> <input type='submit' value='Drop Job' /> <select name='action'> """ % lines for queue in ['queued', 'executing', 'done']: selected = '' if action.find(queue) != -1: selected = 'selected' show += "<option %s value='show%s'>%s</option>" % (selected, queue, queue) drop += "<option %s value='drop%s'>%s</option>" % (selected, queue, queue) show += """ </select> </form> <br /> """ drop += """ </select> <input type='text' size='20' name='job_id' value='' /> </form> <br /> """ html += general html += show html += drop daemons = """ <div id='daemonstatus'> """ daemon_names = [] if configuration.site_enable_jobs: daemon_names += ['grid_script.py', 'grid_monitor.py', 'grid_sshmux.py'] if configuration.site_enable_events: daemon_names.append('grid_events.py') # No need to run im_notify unless any im notify protocols are enabled if configuration.site_enable_imnotify and \ [i for i in configuration.notify_protocols if i != 'email']: daemon_names.append('grid_imnotify.py') if configuration.site_enable_sftp: daemon_names.append('grid_sftp.py') if configuration.site_enable_davs: daemon_names.append('grid_webdavs.py') if configuration.site_enable_ftps: daemon_names.append('grid_ftps.py') if configuration.site_enable_openid: daemon_names.append('grid_openid.py') if configuration.site_enable_transfers: daemon_names.append('grid_transfers.py') if configuration.site_enable_crontab: daemon_names.append('grid_cron.py') if configuration.site_enable_seafile: daemon_names += [ 'seafile-controller', 'seaf-server', 'ccnet-server', 'seahub' ] if configuration.seafile_mount: daemon_names.append('seaf-fuse') if configuration.site_enable_sftp_subsys: daemon_names.append( '/sbin/sshd -f /etc/ssh/sshd_config-MiG-sftp-subsys') for proc in daemon_names: # NOTE: we use command list here to avoid shell requirement pgrep_proc = subprocess_popen(['pgrep', '-f', proc], stdout=subprocess_pipe, stderr=subprocess_stdout) pgrep_proc.wait() ps_out = pgrep_proc.stdout.read().strip() if pgrep_proc.returncode == 0: daemons += "<div class='status_online'>%s running (pid %s)</div>" \ % (proc, ps_out) else: daemons += "<div class='status_offline'>%s not running!</div>" % \ proc daemons += """</div> <br /> """ html += daemons output_objects.append({ 'object_type': 'header', 'text': 'Pending Certificate Requests' }) (list_status, ret) = list_account_reqs(configuration) if not list_status: logger.error("%s: failed for '%s': %s" % (op_name, client_id, ret)) output_objects.append({'object_type': 'error_text', 'text': ret}) return (output_objects, returnvalues.SYSTEM_ERROR) form_method = 'post' csrf_limit = get_csrf_limit(configuration) target_op = 'migadmin' csrf_token = make_csrf_token(configuration, form_method, target_op, client_id, csrf_limit) accountreqs = [] for req_id in ret: (load_status, req_dict) = get_account_req(req_id, configuration) if not load_status: logger.error("%s: load failed for '%s': %s" % (op_name, req_id, req_dict)) output_objects.append({ 'object_type': 'error_text', 'text': 'Could not read details for "%s"' % req_id }) return (output_objects, returnvalues.SYSTEM_ERROR) req_item = build_accountreqitem_object(configuration, req_dict) js_name = 'create%s' % req_id helper = html_post_helper(js_name, '%s.py' % target_op, { 'action': 'addaccountreq', 'req_id': req_id, csrf_field: csrf_token }) output_objects.append({'object_type': 'html_form', 'text': helper}) req_item['addaccountreqlink'] = { 'object_type': 'link', 'destination': "javascript: confirmDialog(%s, '%s');" % (js_name, 'Really accept %s?' % req_id), 'class': 'addlink iconspace', 'title': 'Accept %s' % req_id, 'text': '' } js_name = 'delete%s' % req_id helper = html_post_helper(js_name, '%s.py' % target_op, { 'action': 'delaccountreq', 'req_id': req_id, csrf_field: csrf_token }) output_objects.append({'object_type': 'html_form', 'text': helper}) req_item['delaccountreqlink'] = { 'object_type': 'link', 'destination': "javascript: confirmDialog(%s, '%s');" % (js_name, 'Really remove %s?' % req_id), 'class': 'removelink iconspace', 'title': 'Remove %s' % req_id, 'text': '' } accountreqs.append(req_item) output_objects.append({ 'object_type': 'table_pager', 'entry_name': 'pending certificate/OpenID account requests', 'default_entries': default_pager_entries }) output_objects.append({ 'object_type': 'accountreqs', 'accountreqs': accountreqs }) log_path_list = [] if os.path.isabs(configuration.logfile): log_path_list.append(configuration.logfile) else: log_path_list.append( os.path.join(configuration.log_dir, configuration.logfile)) for log_path in log_path_list: html += ''' <h2>%s</h2> <textarea class="fillwidth padspace" rows=%s readonly="readonly"> ''' % (log_path, lines) log_lines = read_tail(log_path, lines, logger) html += ''.join(log_lines[-lines:]) html += '''</textarea> ''' output_objects.append({'object_type': 'html_form', 'text': html}) return (output_objects, returnvalues.OK)
def job_api_update(configuration, workflow_session, job_type=JOB, **job_attributes): """ Handler for 'update' calls to job API. :param configuration: The MiG configuration object. :param workflow_session: The MiG workflow session. This must contain the key 'owner' :param job_type: [optional] A MiG job type. Default is 'job'. :param job_attributes: dictionary of arguments used to update the specified workflow object. Currently can only be a job id to cancel. :return: Tuple (boolean, string) If the given job_type is valid a tuple is returned with True in the first value and a feedback message in the second. Else, a tuple is returned with a first value of False, and an explanatory error message as the second value. """ _logger = configuration.logger client_id = workflow_session['owner'] job_id = job_attributes.get('JOB_ID', None) if not job_id: msg = "No job id provided in update" _logger.error(msg) return (False, msg) if 'vgrid' not in job_attributes: return (False, "Can't update job without 'vgrid' attribute") vgrid = job_attributes['vgrid'] # User is vgrid owner or member client_id = workflow_session['owner'] success, msg, _ = init_vgrid_script_list(vgrid, client_id, configuration) if not success: return (False, msg) status, job = get_job_with_id(configuration, job_id, vgrid, client_id, only_user_jobs=False) if not status: msg = "Could not open job file for job '%s'" % job_id _logger.error(msg) return (False, msg) if 'STATUS' in job_attributes: new_state = 'CANCELED' if job_attributes['STATUS'] == new_state: possible_cancel_states = [ 'PARSE', 'QUEUED', 'RETRY', 'EXECUTING', 'FROZEN' ] if not job['STATUS'] in possible_cancel_states: msg = 'Could not cancel job with status ' + job['STATUS'] _logger.error(msg) return (False, msg) job_user_dir = client_id_dir(job['USER_CERT']) file_path = os.path.join(configuration.mrsl_files_dir, job_user_dir, job_id + '.mRSL') if not unpickle_and_change_status(file_path, new_state, _logger): _logger.error('%s could not cancel job: %s' % (client_id, job_id)) msg = 'Could not change status of job ' + job_id _logger.error(msg) return (False, msg) if not job.has_key('UNIQUE_RESOURCE_NAME'): job['UNIQUE_RESOURCE_NAME'] = 'UNIQUE_RESOURCE_NAME_NOT_FOUND' if not job.has_key('EXE'): job['EXE'] = 'EXE_NAME_NOT_FOUND' message = 'JOBACTION ' + job_id + ' ' \ + job['STATUS'] + ' ' + new_state + ' ' \ + job['UNIQUE_RESOURCE_NAME'] + ' ' \ + job['EXE'] + '\n' if not send_message_to_grid_script(message, _logger, configuration): msg = '%s failed to send message to grid script: %s' \ % (client_id, message) _logger.error(msg) return (False, msg) return (True, 'Job %s has been succesfully canceled' % job_id) return (False, "No updated applied from attributes '%s'" % job_attributes)