def update_section_helper(client_id, configuration, section_filename, changes, defaults, create_missing=True): """Update settings section in pickled file with values from changes dictionary. Optional create_missing can be used if the pickle should be created if not already there. The defaults dictionary is used to set any missing values. """ client_dir = client_id_dir(client_id) section_path = os.path.join(configuration.user_settings, client_dir, section_filename) if not os.path.exists(section_path): if create_missing: section_dict = {} else: raise Exception('no %s file to update!' % section_filename) else: section_dict = unpickle(section_path, configuration.logger) for (key, val) in defaults.items(): section_dict[key] = section_dict.get(key, val) section_dict.update(changes) if not pickle(section_dict, section_path, configuration.logger): raise Exception('could not save updated %s file!' % section_filename) return section_dict
def save_twofactor_session(configuration, client_id, session_key, user_addr, user_agent, session_start, session_end=-1): """Save twofactor session dict for client_id""" _logger = configuration.logger if configuration.site_enable_gdp: client_id = get_base_client_id(configuration, client_id, expand_oid_alias=False) session_path = os.path.join(configuration.twofactor_home, session_key) if session_end < 0: session_end = session_start + twofactor_cookie_ttl session_data = { 'client_id': client_id, 'session_key': session_key, 'user_addr': user_addr, 'user_agent': user_agent, 'session_start': session_start, 'session_end': session_end } status = pickle(session_data, session_path, configuration.logger) if status and configuration.site_twofactor_strict_address: session_path_link = os.path.join(configuration.twofactor_home, "%s_%s" % (user_addr, session_key)) status = \ make_symlink(session_key, session_path_link, _logger, force=False) if not status: delete_file(session_path, _logger) return status
def save_queue(queue, path, logger): """Save job queue to path for quick loading later""" # Don't try to save logger queue.logger = None return io.pickle(queue, path, logger)
def migrate_job(config, job, peer): protocol = 'https' port = '' server = peer['fqdn'] # Remove schedule hint from job before migration del job['SCHEDULE_HINT'] # Make sure legacy jobs don't fail if not job.has_key('MIGRATE_COUNT'): job['MIGRATE_COUNT'] = str(0) # Add or increment migration counter migrate_count = int(job['MIGRATE_COUNT']) + 1 job['MIGRATE_COUNT'] = str(migrate_count) # TODO: only upload if job is not already replicated at # remote server # TMP! steal_job = False if not steal_job: # upload pickled job to server client_dir = client_id_dir(job['USER_CERT']) mrsl_filename = config.mrsl_files_dir + client_dir + '/'\ + job['JOB_ID'] + '.mRSL' result = pickle(job, mrsl_filename, config.logger) if not result: config.logger.error('Aborting migration of job %s (%s)', job['JOB_ID'], result) return False dest = mrsl_filename # TMP! # upload_reply = put_data(config, mrsl_filename, protocol, server, port, dest) config.logger.warning('Actual migration disabled until fully supported' ) upload_reply = (-1, 'Actual migration disabled until fully supported' ) if upload_reply[0] != http_success: return False # migration_msg = "" # migration_reply = put_data(config, protocol, server, port, migration_msg) return True
def __flush(self): """Dumps the statistics to disk and clears memory""" # Flush dict to file in the statistics for stat_type in self.__gridstat_dict.keys(): for stat_value in self.__gridstat_dict[stat_type].keys(): filename = self.__configuration.gridstat_files_dir\ + stat_type + os.sep + stat_value + '.pck' filedir = os.path.dirname(filename) if not os.path.exists(filedir): os.makedirs(filedir) pickle(self.__gridstat_dict[stat_type][stat_value], filename, self.__logger) # When dict has been flushed, clear it to prevent heavy memory load self.__gridstat_dict = {}
def initialize_and_get_display_dict_filename(configuration, logger): filename = os.path.join(configuration.mig_server_home, 'livedisplaysdict') if os.path.isfile(filename): return (True, filename) logger.info( 'display dict file %s not found, pickling a new with {} as only content' % filename) dict = {} pickle_status = pickle(dict, filename, logger) if not pickle_status: return (False, 'could not pickle %s when initializing' % filename) return (True, filename)
def initialize_and_get_display_dict_filename(configuration, logger): filename = configuration.mig_server_home + os.sep\ + 'livedisplaysdict' if os.path.isfile(filename): return (True, filename) logger.info('display dict file %s not found, pickling a new with {} as only content' % filename) dict = {} pickle_status = pickle(dict, filename, logger) if not pickle_status: return (False, 'could not pickle %s when initializing' % filename) return (True, filename)
def _save_rate_limits(configuration, proto, rate_limits, do_lock=True): """Save rate limits dict""" logger = configuration.logger rate_limits_filepath = os.path.join( configuration.mig_system_run, "%s.%s" % (proto, _rate_limits_filename)) if do_lock: rate_limits_lock = _acquire_rate_limits_lock(configuration, proto) result = pickle(rate_limits, rate_limits_filepath, logger) if do_lock: _release_rate_limits_lock(rate_limits_lock) if not result: logger.error("failed to save %s rate limits to %s" % (proto, rate_limits_filepath)) return result
def remove_item_from_pickled_list( path, item, logger, allow_empty_list=True, ): list_ = unpickle(path, logger) output = '' if list_ == []: # OK, if the list is empty pass elif not list_: output += 'Failure: could not unpickle current list' return (False, output) # Check if the item is in the list item = item.strip() if not item in list_: output += '%s not found in list' % item return (False, output) if not allow_empty_list: if len(list_) <= 1: output += 'You cannot remove the last item' return (False, output) # ok, lets remove the item and pickle and save the new list try: list_.remove(item) except: output += \ 'Strange error, %s could not be removed, but it seems to be in the list'\ % item return (False, output) status = pickle(list_, path, logger) if not status: output += 'Error pickling new owners file' return (False, output) return (True, output)
def migrated_job(filename, client_id, configuration): """returns a tuple (bool status, str msg)""" logger = configuration.logger client_dir = client_id_dir(client_id) job_path = os.path.abspath( os.path.join(configuration.server_home, client_dir, filename)) # unpickle and enqueue received job file job_path_spaces = job_path.replace('\\ ', '\\\\\\ ') job = io.unpickle(job_path_spaces, configuration.logger) # TODO: update any fields to mark migration? if not job: return (False, 'Fatal migration error: loading pickled job (%s) failed! ' % \ job_path_spaces) job_id = job['JOB_ID'] # save file with other mRSL files mrsl_filename = \ os.path.abspath(os.path.join(configuration.mrsl_files_dir, client_dir, job_id + '.mRSL')) if not io.pickle(job, mrsl_filename, configuration.logger): return (False, 'Fatal error: Could not write ' + filename) # tell 'grid_script' message = 'SERVERJOBFILE ' + client_dir + '/' + job_id + '\n' if not io.send_message_to_grid_script(message, logger, configuration): return (False, 'Fatal error: Could not write to grid stdin') # TODO: do we need to wait for grid_script to ack job reception? # ... same question applies to new_job, btw. return (True, '%s succesfully migrated.' % job_id)
def _save_sessions(configuration, proto, sessions, do_lock=True): """Save sessions dict""" logger = configuration.logger do_lock = None sessions_filepath = os.path.join(configuration.mig_system_run, "%s.%s" % (proto, _sessions_filename)) if do_lock: sessions_lock = _acquire_sessions_lock( configuration, proto, exclusive=True) result = pickle(sessions, sessions_filepath, logger) if do_lock: _release_sessions_lock(sessions_lock) if not result: logger.error("failed to save active %s sessions to %s" % (proto, sessions_filepath)) return result
def add_item_to_pickled_list(path, item, logger): list_ = unpickle(path, logger) output = '' if list_ == []: pass elif not list_: output += 'Failure: could not unpickle current list' return (False, output) # Check if the item already is in the list if item in list_: output += '%s is already in the list' % item return (False, output) # ok, lets add the new item and pickle and save the new list list_.append(item) status = pickle(list_, path, logger) if not status: output += 'pickle error' return (False, output) return (True, '')
def main(): configuration = get_configuration_object() # Overwrite default logger logger = configuration.logger = get_logger(logging.INFO) vgrids_dict = {} logger.info('==================== Filling vgrids ====================') fh = open(SETTINGS_LIST) for line in fh: line = line.strip() if len(line) > 0: status = fill_vgrids(configuration, line, vgrids_dict) if not status: break fh.close() if status: logger.info( '==================== Filling triggers ====================') status = fill_triggers(configuration, vgrids_dict) if status: logger.info( '==================== Writing triggers dict ====================') logger.info("'Pickle to file: '%s'" % VGRID_DICT_FILE) status = pickle(vgrids_dict, VGRID_DICT_FILE, logger) if status: return 0 else: return 1
# remove entry from dict and pickle it dict = unpickle(filename, logger) if dict == False: return (False, 'could not unpickle %s' % filename) if not dict.has_key(display_number): return (False, 'display %s not found in dict' % display_number) try: del dict[display_number] except Exception, e: return (False, 'exception trying to remove %s from display dict. Exception %s' % (display_number, e)) pickle_status = pickle(dict, filename, logger) if not pickle_status: return (False, 'could not pickle %s when removing %s' % (filename, display_number)) return (True, '') def get_dict_from_display_number(display_number, configuration, logger): (init_ret, filename) = \ initialize_and_get_display_dict_filename(configuration, logger) if not init_ret: return (False, 'could not initialize') dict = unpickle(filename, logger) if dict == False:
def set_user_display_active( client_id, display_number, vnc_port, password, configuration, logger, ): (init_ret, filename) = \ initialize_and_get_display_dict_filename(configuration, logger) if not init_ret: return (False, 'could not initialize') (dis_ret, dis_dict) = get_dict_from_display_number(display_number, configuration, logger) if not dis_ret: return (False, 'dict error, %s' % dis_dict) if dis_dict != -1: if dis_dict['client_id'] != client_id: # display occupied by another user! return (False, 'display %s already in use by another user!' % display_number) # getting here means display is free or used by client_id dict = unpickle(filename, logger) if dict == False: return (False, 'could not unpickle %s' % filename) current_display = get_users_display_number(client_id, configuration, logger) if not current_display: # register display dict[display_number] = { 'client_id': client_id, 'vnc_port': vnc_port, 'password': password } pickle_status = pickle(dict, filename, logger) if not pickle_status: return (False, 'could not pickle %s when adding %s' % (filename, dict[display_number])) logger.info( 'successfuly registered that display %s is in use by %s in %s' % (display_number, client_id, filename)) return (True, '') if current_display != display_number and current_display != -1: # problems.. return ( False, 'set_user_display_active met a conflict, can not set display %s when user already has %s registered' % (display_number, current_display)) else: # add display to dict dict[display_number] = { 'client_id': client_id, 'vnc_port': vnc_port, 'password': password } pickle_status = pickle(dict, filename, logger) if not pickle_status: return (False, 'could not pickle %s when adding %s' % (filename, dict[display_number])) logger.info( 'successfuly registered that display %s is in use by %s in %s %s' % (display_number, client_id, dict, filename)) return (True, '')
def create_empty_job( unique_resource_name, exe, request_cputime, sleep_factor, localjobname, execution_delay, configuration, logger ): """Helper to create empty job for idle resources""" job_dict = {"": ""} helper_dict_filename = os.path.join( configuration.resource_home, unique_resource_name, "empty_job_helper_dict.%s" % exe ) max_cputime = int(request_cputime) scaled_cputime = int(float(configuration.cputime_for_empty_jobs) * sleep_factor) if scaled_cputime > max_cputime: cputime = max_cputime sleep_time = int(0.8 * cputime) else: cputime = scaled_cputime sleep_time = int(float(configuration.sleep_period_for_empty_jobs) * sleep_factor) logger.info( "request_cputime: %d, sleep_factor: %.1f, cputime: %d, sleep time: %d", max_cputime, sleep_factor, cputime, sleep_time, ) job_id = configuration.empty_job_name + "." + unique_resource_name + "." + exe + "." + localjobname job_dict["JOB_ID"] = job_id # sessionid = configuration.empty_job_name sleep_cmd = "sleep " + str(sleep_time) job_dict["EXECUTE"] = [sleep_cmd] job_dict["INPUTFILES"] = [] job_dict["OUTPUTFILES"] = "" job_dict["ARGUMENTS"] = "" job_dict["EXECUTABLES"] = "" job_dict["MOUNT"] = [] job_dict["CPUTIME"] = str(cputime) job_dict["MEMORY"] = 16 job_dict["DISK"] = 1 job_dict["EXECUTION_DELAY"] = str(execution_delay) job_dict["ENVIRONMENT"] = "" job_dict["RUNTIMEENVIRONMENT"] = [] job_dict["MAXPRICE"] = "0" job_dict["JOBNAME"] = "empty job" client_id = configuration.empty_job_name job_dict["USER_CERT"] = client_id # create mRSL file only containing the unique_resource_name. # This is used when the .status file from the empty job is # uploaded, to find the unique name of the resource to be able # to start the exe again if continuous is True # if not os.path.isfile(helper_dict_filename): helper_dict = {} helper_dict["JOB_ID"] = job_id helper_dict["UNIQUE_RESOURCE_NAME"] = unique_resource_name helper_dict["EXE"] = exe helper_dict["IS_EMPTY_JOB_HELPER_DICT"] = True helper_dict["LOCALJOBNAME"] = localjobname pickle(helper_dict, helper_dict_filename, logger) return (job_dict, "OK")
def save_schedule_cache(cache, path, logger): """Save schedule cache to path for quick loading later""" return io.pickle(cache, path, logger)
logger.error('Could not remove link %s: %s' % (link, err)) job_dict['STATUS'] = status job_dict[ status + '_TIMESTAMP' ] = timestamp if not status == 'FINISHED': # Generate execution history if not job_dict.has_key('EXECUTION_HISTORY'): job_dict['EXECUTION_HISTORY'] = [] history_dict = { 'QUEUED_TIMESTAMP': job_dict['QUEUED_TIMESTAMP'], 'EXECUTING_TIMESTAMP': job_dict['EXECUTING_TIMESTAMP'], status + '_TIMESTAMP': timestamp, status + '_MESSAGE': msg, 'UNIQUE_RESOURCE_NAME': job_dict['UNIQUE_RESOURCE_NAME'], } job_dict['EXECUTION_HISTORY'].append(history_dict) # save into mrsl mrsl_file = os.path.join(configuration.mrsl_files_dir, client_dir, job_dict['JOB_ID'] + '.mRSL') io.pickle(job_dict, mrsl_file, logger) return
os.remove(link) except Exception, err: logger.error('Could not remove link %s: %s' % (link, err)) job_dict['STATUS'] = status job_dict[status + '_TIMESTAMP'] = timestamp if not status == 'FINISHED': # Generate execution history if not job_dict.has_key('EXECUTION_HISTORY'): job_dict['EXECUTION_HISTORY'] = [] history_dict = { 'QUEUED_TIMESTAMP': job_dict['QUEUED_TIMESTAMP'], 'EXECUTING_TIMESTAMP': job_dict['EXECUTING_TIMESTAMP'], status + '_TIMESTAMP': timestamp, status + '_MESSAGE': msg, 'UNIQUE_RESOURCE_NAME': job_dict['UNIQUE_RESOURCE_NAME'], } job_dict['EXECUTION_HISTORY'].append(history_dict) # save into mrsl mrsl_file = os.path.join(configuration.mrsl_files_dir, client_dir, job_dict['JOB_ID'] + '.mRSL') io.pickle(job_dict, mrsl_file, logger) return
new_dict[key] = value_dict['Value'] new_dict['CREATOR'] = client_id new_dict['CREATED_TIMESTAMP'] = datetime.datetime.now() # Create settings dir for any old users try: settings_dir = os.path.join(configuration.user_settings, client_dir) os.mkdir(settings_dir) except: pass pickle_filename = os.path.join(configuration.user_settings, client_dir, destination) if not pickle(new_dict, pickle_filename, configuration.logger): msg = 'Error saving pickled data!' return (False, msg) # everything ok return (True, '') def parse_and_save_settings(filename, client_id, configuration): """Validate and write settings entries from filename""" status = parse_and_save_pickle(filename, settings_filename, get_settings_fields(), client_id, configuration, True, True) if status[0]: mark_user_modified(configuration, client_id)
def create_empty_job( unique_resource_name, exe, request_cputime, sleep_factor, localjobname, execution_delay, configuration, logger, ): """Helper to create empty job for idle resources""" job_dict = {'': ''} helper_dict_filename = os.path.join(configuration.resource_home, unique_resource_name, 'empty_job_helper_dict.%s' % exe) max_cputime = int(request_cputime) scaled_cputime = int(float(configuration.cputime_for_empty_jobs) * sleep_factor) if scaled_cputime > max_cputime: cputime = max_cputime sleep_time = int(0.8 * cputime) else: cputime = scaled_cputime sleep_time = int(float(configuration.sleep_period_for_empty_jobs) * sleep_factor) logger.info( 'request_cputime: %d, sleep_factor: %.1f, cputime: %d, sleep time: %d', max_cputime, sleep_factor, cputime, sleep_time) job_id = configuration.empty_job_name + '.' + unique_resource_name + \ '.' + exe + '.' + localjobname job_dict['JOB_ID'] = job_id # sessionid = configuration.empty_job_name sleep_cmd = 'sleep ' + str(sleep_time) job_dict['EXECUTE'] = [sleep_cmd] job_dict['INPUTFILES'] = [] job_dict['OUTPUTFILES'] = '' job_dict['ARGUMENTS'] = '' job_dict['EXECUTABLES'] = '' job_dict['MOUNT'] = [] job_dict['CPUTIME'] = str(cputime) job_dict['MEMORY'] = 16 job_dict['DISK'] = 1 job_dict['EXECUTION_DELAY'] = str(execution_delay) job_dict['ENVIRONMENT'] = '' job_dict['RUNTIMEENVIRONMENT'] = [] job_dict['MAXPRICE'] = '0' job_dict['JOBNAME'] = 'empty job' client_id = configuration.empty_job_name job_dict['USER_CERT'] = client_id # create mRSL file only containing the unique_resource_name. # This is used when the .status file from the empty job is # uploaded, to find the unique name of the resource to be able # to start the exe again if continuous is True # if not os.path.isfile(helper_dict_filename): helper_dict = {} helper_dict['JOB_ID'] = job_id helper_dict['UNIQUE_RESOURCE_NAME'] = unique_resource_name helper_dict['EXE'] = exe helper_dict['IS_EMPTY_JOB_HELPER_DICT'] = True helper_dict['LOCALJOBNAME'] = localjobname pickle(helper_dict, helper_dict_filename, logger) return (job_dict, 'OK')
except arc.ARCWrapperError, err: return (False, err.what()) except arc.NoProxyError, err: return (False, 'No Proxy found: %s' % err.what()) except Exception, err: return (False, err.__str__()) # save file if outfile == 'AUTOMATIC': filename = \ os.path.abspath(os.path.join(configuration.mrsl_files_dir, client_dir, job_id + '.mRSL')) else: filename = outfile if not pickle(global_dict, filename, logger): return (False, 'Fatal error: Could not write %s' % filename) if not outfile == 'AUTOMATIC': # an outfile was specified, so this is just for testing - dont tell # grid_script return (True, '') # tell 'grid_script' message = 'USERJOBFILE %s/%s\n' % (client_dir, job_id) if not send_message_to_grid_script(message, logger, configuration): return (False, '''Fatal error: Could not get exclusive access or write
def set_user_display_active( client_id, display_number, vnc_port, password, configuration, logger, ): (init_ret, filename) = \ initialize_and_get_display_dict_filename(configuration, logger) if not init_ret: return (False, 'could not initialize') (dis_ret, dis_dict) = get_dict_from_display_number(display_number, configuration, logger) if not dis_ret: return (False, 'dict error, %s' % dis_dict) if dis_dict != -1: if dis_dict['client_id'] != client_id: # display occupied by another user! return (False, 'display %s already in use by another user!' % display_number) # getting here means display is free or used by client_id dict = unpickle(filename, logger) if dict == False: return (False, 'could not unpickle %s' % filename) current_display = get_users_display_number(client_id, configuration, logger) if not current_display: # register display dict[display_number] = {'client_id': client_id, 'vnc_port': vnc_port, 'password': password} pickle_status = pickle(dict, filename, logger) if not pickle_status: return (False, 'could not pickle %s when adding %s' % (filename, dict[display_number])) logger.info('successfuly registered that display %s is in use by %s in %s' % (display_number, client_id, filename)) return (True, '') if current_display != display_number and current_display != -1: # problems.. return (False, 'set_user_display_active met a conflict, can not set display %s when user already has %s registered' % (display_number, current_display)) else: # add display to dict dict[display_number] = {'client_id': client_id, 'vnc_port': vnc_port, 'password': password} pickle_status = pickle(dict, filename, logger) if not pickle_status: return (False, 'could not pickle %s when adding %s' % (filename, dict[display_number])) logger.info('successfuly registered that display %s is in use by %s in %s %s' % (display_number, client_id, dict, filename)) return (True, '')
def requeue_job( job_dict, failed_msg, job_queue, executing_queue, configuration, logger, ): """Requeue a failed job by moving it from executing_queue to job_queue""" if not job_dict: msg = 'requeue_job: %s is no longer in executing queue' print failed_msg logger.info(failed_msg) else: executing_queue.dequeue_job_by_id(job_dict['JOB_ID']) failed_timestamp = time.gmtime() # Clean up the server for files assosiated with the executing job if not job_dict.has_key('SESSIONID')\ or not job_dict.has_key('IOSESSIONID')\ or not server_cleanup( job_dict['SESSIONID'], job_dict['IOSESSIONID'], job_dict['LOCALJOBNAME'], job_dict['JOB_ID'], configuration, logger, ): logger.error('could not clean up MiG server') print 'CLEAN UP FAILED' client_dir = client_id_dir(job_dict['USER_CERT']) # Remove job result files, if they have arrived as the result is not valid # This can happen with sandboxes as they can't be stopped serverside status_prefix = os.path.join(configuration.user_home, client_dir, job_dict['JOB_ID']) io.delete_file(status_prefix + '.status', logger) io.delete_file(status_prefix + '.stdout', logger) io.delete_file(status_prefix + '.stderr', logger) # Generate execution history if not job_dict.has_key('EXECUTION_HISTORY'): job_dict['EXECUTION_HISTORY'] = [] history_dict = { 'QUEUED_TIMESTAMP': job_dict['QUEUED_TIMESTAMP'], 'EXECUTING_TIMESTAMP': job_dict['EXECUTING_TIMESTAMP'], 'FAILED_TIMESTAMP': failed_timestamp, 'FAILED_MESSAGE': failed_msg, 'UNIQUE_RESOURCE_NAME': job_dict['UNIQUE_RESOURCE_NAME'], 'RESOURCE_VGRID': job_dict.get('RESOURCE_VGRID', ''), 'PUBLICNAME': job_dict.get('PUBLICNAME', 'HIDDEN'), } job_dict['EXECUTION_HISTORY'].append(history_dict) # Retry if retries left job_dict['RETRY_COUNT'] = job_dict.get('RETRY_COUNT', 0) + 1 unique_resource_name = job_dict['UNIQUE_RESOURCE_NAME'] mrsl_file = os.path.join(configuration.mrsl_files_dir, client_dir, job_dict['JOB_ID'] + '.mRSL') job_retries = job_dict.get('RETRIES', configuration.job_retries) if job_dict['RETRY_COUNT'] <= job_retries: job_dict['STATUS'] = 'QUEUED' job_dict['QUEUED_TIMESTAMP'] = time.gmtime() del job_dict['EXECUTING_TIMESTAMP'] del job_dict['UNIQUE_RESOURCE_NAME'] del job_dict['EXE'] del job_dict['RESOURCE_CONFIG'] del job_dict['LOCALJOBNAME'] if job_dict.has_key('SESSIONID'): del job_dict['SESSIONID'] if job_dict.has_key('IOSESSIONID'): del job_dict['IOSESSIONID'] if job_dict.has_key('PUBLICNAME'): del job_dict['PUBLICNAME'] if job_dict.has_key('RESOURCE_VGRID'): del job_dict['RESOURCE_VGRID'] io.pickle(job_dict, mrsl_file, logger) # Requeue job last in queue for retry later job_queue.enqueue_job(job_dict, job_queue.queue_length()) msg = \ '%s failed to execute job %s - requeue for retry %d of %d'\ % (unique_resource_name, job_dict['JOB_ID'], job_dict['RETRY_COUNT'], job_retries) print msg logger.info(msg) else: job_dict['STATUS'] = 'FAILED' job_dict['FAILED_TIMESTAMP'] = failed_timestamp io.pickle(job_dict, mrsl_file, logger) # tell the user the sad news msg = 'Gave up on executing job %s after %d retries'\ % (job_dict['JOB_ID'], job_retries) logger.error(msg) print msg notify_user_thread( job_dict, configuration.myfiles_py_location, 'FAILED', logger, False, configuration, )
dict = unpickle(filename, logger) if dict == False: return (False, 'could not unpickle %s' % filename) if not dict.has_key(display_number): return (False, 'display %s not found in dict' % display_number) try: del dict[display_number] except Exception, e: return ( False, 'exception trying to remove %s from display dict. Exception %s' % (display_number, e)) pickle_status = pickle(dict, filename, logger) if not pickle_status: return (False, 'could not pickle %s when removing %s' % (filename, display_number)) return (True, '') def get_dict_from_display_number(display_number, configuration, logger): (init_ret, filename) = \ initialize_and_get_display_dict_filename(configuration, logger) if not init_ret: return (False, 'could not initialize') dict = unpickle(filename, logger) if dict == False:
def main(client_id, user_arguments_dict): """Main function used by front end""" (configuration, logger, output_objects, op_name) = \ initialize_main_variables(client_id, op_header=False) client_dir = client_id_dir(client_id) defaults = signature()[1] (validate_status, accepted) = validate_input_and_cert( user_arguments_dict, defaults, output_objects, client_id, configuration, allow_rejects=False, ) if not validate_status: return (accepted, returnvalues.CLIENT_ERROR) job_ids = accepted['job_id'] action = accepted['action'][-1] src = accepted['src'] dst = accepted['dst'][-1] title_entry = find_entry(output_objects, 'title') title_entry['text'] = '%s live I/O' % configuration.short_title output_objects.append({'object_type': 'header', 'text' : 'Request live communication with jobs'}) if not action in valid_actions: output_objects.append({'object_type': 'error_text', 'text' : 'Invalid action "%s" (supported: %s)' % \ (action, ', '.join(valid_actions))}) return (output_objects, returnvalues.CLIENT_ERROR) if action in post_actions and not correct_handler('POST'): output_objects.append( {'object_type': 'error_text', 'text' : 'Only accepting POST requests to prevent unintended updates'}) return (output_objects, returnvalues.CLIENT_ERROR) if not job_ids or action in interactive_actions: job_id = '' if job_ids: job_id = job_ids[-1] output_objects.append({'object_type': 'text', 'text' : ''' Fill in the live I/O details below to request communication with a running job. Job ID can be a full ID or a wild card pattern using "*" and "?" to match one or more of your job IDs. Use send output without source and destination paths to request upload of the default stdio files from the job on the resource to the associated job_output directory in your MiG home. Destination is a always handled as a directory path to put source files into. Source and destination paths are always taken relative to the job execution directory on the resource and your MiG home respectively. '''}) html = ''' <table class="liveio"> <tr> <td> <form method="post" action="liveio.py"> <table class="liveio"> <tr><td class=centertext> </td></tr> <tr><td> Action:<br /> <input type=radio name=action checked value="send" />send output <input type=radio name=action value="get" />get input </td></tr> <tr><td> Job ID:<br /> <input type=text size=60 name=job_id value="%s" /> </td></tr> <tr><td> Source path(s):<br /> <div id="srcfields"> <input type=text size=60 name=src value="" /><br /> </div> </td></tr> <tr><td> Destination path:<br /> <input type=text size=60 name=dst value="" /> </td></tr> <tr><td> <input type="submit" value="Send request" /> </td></tr> </table> </form> </td> <td> <script type="text/javascript"> fields = 1; max_fields = 64; function addInput() { if (fields < max_fields) { document.getElementById("srcfields").innerHTML += "<input type=text size=60 name=src value='' /><br />"; fields += 1; } else { alert("Maximum " + max_fields + " source fields allowed!"); document.form.add.disabled=true; } } </script> <form name="addsrcform"> <input type="button" onclick="addInput(); return false;" name="add" value="Add another source field" /> </form> </td> </tr> </table> ''' % job_id output_objects.append({'object_type': 'html_form', 'text' : html}) output_objects.append({'object_type': 'text', 'text': ''' Further live job control is avalable through your personal message queues. They provide a basic interface for centrally storing messages under your grid account and can be used to pass messages between jobs or for orchestrating jobs before and during execution. ''' }) output_objects.append({'object_type': 'link', 'destination': 'mqueue.py', 'text': 'Message queue interface'}) return (output_objects, returnvalues.OK) elif action in ['get', 'receive', 'input']: action = 'get' action_desc = 'will be downloaded to the job on the resource' elif action in ['put', 'send', 'output']: action = 'send' action_desc = 'will be uploaded from the job on the resource' else: output_objects.append({'object_type': 'error_text', 'text' : 'Invalid live io action: %s' % action}) return (output_objects, returnvalues.CLIENT_ERROR) output_objects.append({'object_type': 'text', 'text' : 'Requesting live I/O for %s' % ', '.join(job_ids)}) if action == 'get' and (not src or not dst): output_objects.append( {'object_type': 'error_text', 'text': 'src and dst parameters required for live input'}) return (output_objects, returnvalues.CLIENT_ERROR) # Automatic fall back to stdio files if output with no path provided if src: src_text = 'The files ' + ' '.join(src) else: src_text = 'The job stdio files' if dst: dst_text = 'the ' + dst + ' directory' else: dst_text = 'the corresponding job_output directory' # Please note that base_dir must end in slash to avoid access to other # user dirs when own name is a prefix of another user name base_dir = \ os.path.abspath(os.path.join(configuration.mrsl_files_dir, client_dir)) + os.sep filelist = [] for job_id in job_ids: job_id = job_id.strip() # is job currently being executed? # Backward compatibility - all_jobs keyword should match all jobs if job_id == all_jobs: job_id = '*' # Check directory traversal attempts before actual handling to avoid # leaking information about file system layout while allowing # consistent error messages unfiltered_match = glob.glob(base_dir + job_id + '.mRSL') match = [] for server_path in unfiltered_match: real_path = os.path.abspath(server_path) if not valid_user_path(real_path, base_dir, True): # out of bounds - save user warning for later to allow # partial match: # ../*/* is technically allowed to match own files. logger.warning("%s tried to %s restricted path %s ! (%s)" % \ (client_id, op_name, real_path, job_id)) continue # Insert valid job files in filelist for later treatment match.append(real_path) # Now actually treat list of allowed matchings and notify if no # (allowed) match.... if not match: output_objects.append( {'object_type': 'error_text', 'text' : '%s: You do not have any matching job IDs!' % job_id}) else: filelist += match for filepath in filelist: # Extract jo_id from filepath (replace doesn't modify filepath) mrsl_file = filepath.replace(base_dir, '') job_id = mrsl_file.replace('.mRSL', '') job_dict = unpickle(filepath, logger) if not job_dict: status = returnvalues.CLIENT_ERROR output_objects.append( {'object_type': 'error_text', 'text' : ('You can only list status of your own jobs. ' 'Please verify that you submitted the mRSL file ' 'with job id "%s" (Could not unpickle mRSL file %s)' ) % (job_id, filepath)}) continue if job_dict['STATUS'] != 'EXECUTING': output_objects.append( {'object_type': 'text', 'text' : 'Job %s is not currently being executed! Job status: %s' % (job_id, job_dict['STATUS'])}) continue if job_dict['UNIQUE_RESOURCE_NAME'] == 'ARC': output_objects.append( {'object_type': 'text', 'text' : 'Job %s is submitted to ARC, details are not available!' % job_id }) continue last_live_update_dict = {} last_live_update_file = configuration.mig_system_files + os.sep\ + job_id + '.last_live_update' if os.path.isfile(last_live_update_file): last_live_update_dict_unpickled = \ unpickle(last_live_update_file, logger) if not last_live_update_dict_unpickled: output_objects.append({'object_type': 'error_text', 'text' : 'Could not unpickle %s - skipping request!' % last_live_update_file}) continue if not last_live_update_dict_unpickled.has_key( 'LAST_LIVE_UPDATE_REQUEST_TIMESTAMP'): output_objects.append( {'object_type': 'error_text', 'text': 'Could not find needed key in %s.' % last_live_update_file}) continue last_live_update_request = \ last_live_update_dict_unpickled['LAST_LIVE_UPDATE_REQUEST_TIMESTAMP' ] difference = datetime.datetime.now()- last_live_update_request try: min_delay = \ int(configuration.min_seconds_between_live_update_requests) except: min_delay = 30 if difference.seconds < min_delay: output_objects.append( {'object_type': 'error_text', 'text': ('Request not allowed, you must wait at least ' \ '%s seconds between live update requests!' ) % min_delay}) continue # save this request to file to avoid DoS from a client request loop. last_live_update_dict['LAST_LIVE_UPDATE_REQUEST_TIMESTAMP'] = \ datetime.datetime.now() pickle_ret = pickle(last_live_update_dict, last_live_update_file, logger) if not pickle_ret: output_objects.append( {'object_type': 'error_text', 'text' : 'Error saving live io request timestamp to last_live_update ' 'file, request not sent!'}) continue # # # ## job is being executed right now, send live io request to frontend # # # get resource_config, needed by scp_file_to_resource #(status, resource_config) = get_resource_configuration( # resource_home, unique_resource_name, logger) resource_config = job_dict['RESOURCE_CONFIG'] (status, exe) = get_resource_exe(resource_config, job_dict['EXE'], logger) if not status: output_objects.append( {'object_type': 'error_text', 'text' : 'Could not get exe configuration for job %s' % job_id}) continue local_file = '%s.%supdate' % (job_dict['LOCALJOBNAME'], action) if not os.path.exists(local_file): # create try: filehandle = open(local_file, 'w') filehandle.write('job_id ' + job_dict['JOB_ID'] + '\n') filehandle.write('localjobname ' + job_dict['LOCALJOBNAME'] + '\n') filehandle.write('execution_user ' + exe['execution_user'] + '\n') filehandle.write('execution_node ' + exe['execution_node'] + '\n') filehandle.write('execution_dir ' + exe['execution_dir'] + '\n') filehandle.write('target liveio\n') # Leave defaults src and dst to FE script if not provided if src: filehandle.write('source ' + ' '.join(src) + '\n') if dst: filehandle.write('destination ' + dst + '\n') # Backward compatible test for shared_fs - fall back to scp if exe.has_key('shared_fs') and exe['shared_fs']: filehandle.write('copy_command cp\n') filehandle.write('copy_frontend_prefix \n') filehandle.write('copy_execution_prefix \n') else: filehandle.write('copy_command scp -B\n') filehandle.write('copy_frontend_prefix ${frontend_user}@${frontend_node}:\n' ) filehandle.write('copy_execution_prefix ${execution_user}@${execution_node}:\n' ) filehandle.write('### END OF SCRIPT ###\n') filehandle.close() except Exception, exc: pass if not os.path.exists(local_file): output_objects.append( {'object_type': 'error_text', 'text' : '.%supdate file not available on %s server' % \ (action, configuration.short_title)}) continue scpstatus = copy_file_to_resource(local_file, '%s.%supdate' % (job_dict['LOCALJOBNAME'], action), resource_config, logger) if not scpstatus: output_objects.append( {'object_type': 'error_text', 'text' : 'Error sending request for live io to resource!'}) continue else: output_objects.append( {'object_type': 'text', 'text' : 'Request for live io was successfully sent to the resource!' }) output_objects.append( {'object_type': 'text', 'text' : '%s %s and should become available in %s in a minute.' % \ (src_text, action_desc, dst_text) }) if action == 'send': if not dst: target_path = '%s/%s/*' % (job_output_dir, job_id) else: target_path = dst output_objects.append({'object_type': 'link', 'destination' : 'ls.py?path=%s' % target_path, 'text': 'View uploaded files'}) else: output_objects.append({'object_type': 'link', 'destination' : 'ls.py?path=%s' % ';path='.join(src), 'text': 'View files for download'}) try: os.remove(local_file) except Exception, exc: pass
return (False, err.what()) except arc.NoProxyError, err: return (False, 'No Proxy found: %s' % err.what()) except Exception, err: return (False, err.__str__()) # save file if outfile == 'AUTOMATIC': filename = \ os.path.abspath(os.path.join(configuration.mrsl_files_dir, client_dir, job_id + '.mRSL')) else: filename = outfile if not pickle(global_dict, filename, logger): return (False, 'Fatal error: Could not write %s' % filename) if not outfile == 'AUTOMATIC': # an outfile was specified, so this is just for testing - dont tell # grid_script return (True, '') # tell 'grid_script' message = 'USERJOBFILE %s/%s\n' % (client_dir, job_id) if not send_message_to_grid_script(message, logger, configuration): return (False, '''Fatal error: Could not get exclusive access or write
def main(client_id, user_arguments_dict): """Main function used by front end""" (configuration, logger, output_objects, op_name) = \ initialize_main_variables(client_id, op_header=False) client_dir = client_id_dir(client_id) defaults = signature()[1] (validate_status, accepted) = validate_input_and_cert( user_arguments_dict, defaults, output_objects, client_id, configuration, allow_rejects=False, ) if not validate_status: return (accepted, returnvalues.CLIENT_ERROR) logger.debug("User: %s executing %s" % (client_id, op_name)) if not configuration.site_enable_jupyter: output_objects.append({ 'object_type': 'error_text', 'text': 'The Jupyter service is not enabled on the system' }) return (output_objects, returnvalues.SYSTEM_ERROR) if not configuration.site_enable_sftp_subsys and not \ configuration.site_enable_sftp: output_objects.append({ 'object_type': 'error_text', 'text': 'The required sftp service is not enabled on the system' }) return (output_objects, returnvalues.SYSTEM_ERROR) if configuration.site_enable_sftp: sftp_port = configuration.user_sftp_port if configuration.site_enable_sftp_subsys: sftp_port = configuration.user_sftp_subsys_port requested_service = accepted['service'][-1] service = { k: v for options in configuration.jupyter_services for k, v in options.items() if options['service_name'] == requested_service } if not service: valid_services = [ options['name'] for options in configuration.jupyter_services ] output_objects.append({ 'object_type': 'error_text', 'text': '%s is not a valid jupyter service, ' 'allowed include %s' % (requested_service, valid_services) }) return (output_objects, returnvalues.SYSTEM_ERROR) valid_service = valid_jupyter_service(configuration, service) if not valid_service: output_objects.append({ 'object_type': 'error_text', 'text': 'The service %s appears to be misconfigured, ' 'please contact a system administrator about this issue' % requested_service }) return (output_objects, returnvalues.SYSTEM_ERROR) host = get_host_from_service(configuration, service) # Get an active jupyterhost if host is None: logger.error("No active jupyterhub host could be found") output_objects.append({ 'object_type': 'error_text', 'text': 'Failed to establish connection to the %s Jupyter service' % service['service_name'] }) output_objects.append({ 'object_type': 'link', 'destination': 'jupyter.py', 'text': 'Back to Jupyter services overview' }) return (output_objects, returnvalues.SYSTEM_ERROR) remote_user = unescape(os.environ.get('REMOTE_USER', '')).strip() if not remote_user: logger.error("Can't connect to jupyter with an empty REMOTE_USER " "environment variable") output_objects.append({ 'object_type': 'error_text', 'text': 'Failed to establish connection to the Jupyter service' }) return (output_objects, returnvalues.CLIENT_ERROR) # Ensure the remote_user dict can be http posted remote_user = str(remote_user) # TODO, activate admin info # remote_user = {'USER': username, 'IS_ADMIN': is_admin(client_id, # configuration, # logger)} # Regular sftp path mnt_path = os.path.join(configuration.jupyter_mount_files_dir, client_dir) # Subsys sftp path subsys_path = os.path.join(configuration.mig_system_files, 'jupyter_mount') # sftp session path link_home = configuration.sessid_to_jupyter_mount_link_home user_home_dir = os.path.join(configuration.user_home, client_dir) # Preparing prerequisites if not os.path.exists(mnt_path): os.makedirs(mnt_path) if not os.path.exists(link_home): os.makedirs(link_home) if configuration.site_enable_sftp_subsys: if not os.path.exists(subsys_path): os.makedirs(subsys_path) # Make sure ssh daemon does not complain tighten_key_perms(configuration, client_id) url_base = '/' + service['service_name'] url_home = url_base + '/home' url_auth = host + url_base + '/hub/login' url_data = host + url_base + '/hub/user-data' # Does the client home dir contain an active mount key # If so just keep on using it. jupyter_mount_files = [ os.path.join(mnt_path, jfile) for jfile in os.listdir(mnt_path) if jfile.endswith('.jupyter_mount') ] logger.info("User: %s mount files: %s" % (client_id, "\n".join(jupyter_mount_files))) logger.debug("Remote-User %s" % remote_user) active_mounts = [] for jfile in jupyter_mount_files: jupyter_dict = unpickle(jfile, logger) if not jupyter_dict: # Remove failed unpickle logger.error("Failed to unpickle %s removing it" % jfile) remove_jupyter_mount(jfile, configuration) else: # Mount has been timed out if not is_active(jupyter_dict): remove_jupyter_mount(jfile, configuration) else: # Valid mount active_mounts.append({'path': jfile, 'state': jupyter_dict}) logger.debug( "User: %s active keys: %s" % (client_id, "\n".join([mount['path'] for mount in active_mounts]))) # If multiple are active, remove oldest active_mount, old_mounts = get_newest_mount(active_mounts) for mount in old_mounts: remove_jupyter_mount(mount['path'], configuration) # A valid active key is already present redirect straight to the jupyter # service, pass most recent mount information if active_mount is not None: mount_dict = mig_to_mount_adapt(active_mount['state']) user_dict = mig_to_user_adapt(active_mount['state']) logger.debug("Existing header values, Mount: %s User: %s" % (mount_dict, user_dict)) auth_header = {'Remote-User': remote_user} json_data = {'data': {'Mount': mount_dict, 'User': user_dict}} if configuration.site_enable_workflows: workflows_dict = mig_to_workflows_adapt(active_mount['state']) if not workflows_dict: # No cached workflows session could be found -> refresh with a # one workflow_session_id = get_workflow_session_id( configuration, client_id) if not workflow_session_id: workflow_session_id = create_workflow_session_id( configuration, client_id) # TODO get this dynamically url = configuration.migserver_https_sid_url + \ '/cgi-sid/workflowsjsoninterface.py?output_format=json' workflows_dict = { 'WORKFLOWS_URL': url, 'WORKFLOWS_SESSION_ID': workflow_session_id } logger.debug("Existing header values, Workflows: %s" % workflows_dict) json_data['workflows_data'] = {'Session': workflows_dict} with requests.session() as session: # Authenticate and submit data response = session.post(url_auth, headers=auth_header) if response.status_code == 200: response = session.post(url_data, json=json_data) if response.status_code != 200: logger.error( "Jupyter: User %s failed to submit data %s to %s" % (client_id, json_data, url_data)) else: logger.error( "Jupyter: User %s failed to authenticate against %s" % (client_id, url_auth)) # Redirect client to jupyterhub return jupyter_host(configuration, output_objects, remote_user, url_home) # Create a new keyset # Create login session id session_id = generate_random_ascii(2 * session_id_bytes, charset='0123456789abcdef') # Generate private/public keys (mount_private_key, mount_public_key) = generate_ssh_rsa_key_pair(encode_utf8=True) # Known hosts sftp_addresses = socket.gethostbyname_ex( configuration.user_sftp_show_address or socket.getfqdn()) # Subsys sftp support if configuration.site_enable_sftp_subsys: # Restrict possible mount agent auth_content = [] restrict_opts = 'no-agent-forwarding,no-port-forwarding,no-pty,' restrict_opts += 'no-user-rc,no-X11-forwarding' restrictions = '%s' % restrict_opts auth_content.append('%s %s\n' % (restrictions, mount_public_key)) # Write auth file write_file('\n'.join(auth_content), os.path.join(subsys_path, session_id + '.authorized_keys'), logger, umask=027) logger.debug("User: %s - Creating a new jupyter mount keyset - " "private_key: %s public_key: %s " % (client_id, mount_private_key, mount_public_key)) jupyter_dict = { 'MOUNT_HOST': configuration.short_title, 'SESSIONID': session_id, 'USER_CERT': client_id, # don't need fraction precision, also not all systems provide fraction # precision. 'CREATED_TIMESTAMP': int(time.time()), 'MOUNTSSHPRIVATEKEY': mount_private_key, 'MOUNTSSHPUBLICKEY': mount_public_key, # Used by the jupyterhub to know which host to mount against 'TARGET_MOUNT_ADDR': "@" + sftp_addresses[0] + ":", 'PORT': sftp_port } client_email = extract_field(client_id, 'email') if client_email: jupyter_dict.update({'USER_EMAIL': client_email}) if configuration.site_enable_workflows: workflow_session_id = get_workflow_session_id(configuration, client_id) if not workflow_session_id: workflow_session_id = create_workflow_session_id( configuration, client_id) # TODO get this dynamically url = configuration.migserver_https_sid_url + \ '/cgi-sid/workflowsjsoninterface.py?output_format=json' jupyter_dict.update({ 'WORKFLOWS_URL': url, 'WORKFLOWS_SESSION_ID': workflow_session_id }) # Only post the required keys, adapt to API expectations mount_dict = mig_to_mount_adapt(jupyter_dict) user_dict = mig_to_user_adapt(jupyter_dict) workflows_dict = mig_to_workflows_adapt(jupyter_dict) logger.debug("User: %s Mount header: %s" % (client_id, mount_dict)) logger.debug("User: %s User header: %s" % (client_id, user_dict)) if workflows_dict: logger.debug("User: %s Workflows header: %s" % (client_id, workflows_dict)) # Auth and pass a new set of valid mount keys auth_header = {'Remote-User': remote_user} json_data = {'data': {'Mount': mount_dict, 'User': user_dict}} if workflows_dict: json_data['workflows_data'] = {'Session': workflows_dict} # First login with requests.session() as session: # Authenticate response = session.post(url_auth, headers=auth_header) if response.status_code == 200: response = session.post(url_data, json=json_data) if response.status_code != 200: logger.error( "Jupyter: User %s failed to submit data %s to %s" % (client_id, json_data, url_data)) else: logger.error("Jupyter: User %s failed to authenticate against %s" % (client_id, url_auth)) # Update pickle with the new valid key jupyter_mount_state_path = os.path.join(mnt_path, session_id + '.jupyter_mount') pickle(jupyter_dict, jupyter_mount_state_path, logger) # Link jupyter pickle state file linkdest_new_jupyter_mount = os.path.join(mnt_path, session_id + '.jupyter_mount') linkloc_new_jupyter_mount = os.path.join(link_home, session_id + '.jupyter_mount') make_symlink(linkdest_new_jupyter_mount, linkloc_new_jupyter_mount, logger) # Link userhome linkloc_user_home = os.path.join(link_home, session_id) make_symlink(user_home_dir, linkloc_user_home, logger) return jupyter_host(configuration, output_objects, remote_user, url_home)
new_dict[key] = value_dict['Value'] new_dict['CREATOR'] = client_id new_dict['CREATED_TIMESTAMP'] = datetime.datetime.now() # Create settings dir for any old users try: settings_dir = os.path.join(configuration.user_settings, client_dir) os.mkdir(settings_dir) except: pass pickle_filename = os.path.join(configuration.user_settings, client_dir, destination) if not pickle(new_dict, pickle_filename, configuration.logger): msg = 'Error saving pickled data!' return (False, msg) # everything ok return (True, '') def parse_and_save_settings(filename, client_id, configuration): """Validate and write settings entries from filename""" status = parse_and_save_pickle(filename, settings_filename, get_settings_fields(), client_id, configuration, True, True) if status[0]: mark_user_modified(configuration, client_id) return status