Exemple #1
0
def get_job_ids_with_specified_project_name(
    client_id,
    project_name,
    mrsl_files_dir,
    logger,
    ):
    """Helper for finding a job with a given project field"""

    client_dir = client_id_dir(client_id)

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = os.path.abspath(os.path.join(mrsl_files_dir, client_dir)) \
         + os.sep

    # this is heavy :-/ we must loop all the mrsl files submitted by the user
    # to find the job ids belonging to the specified project

    matching_job_ids = []
    all_files = os.listdir(base_dir)

    for mrsl_file in all_files:
        job_dict = unpickle(base_dir + os.sep + mrsl_file, logger)
        if not job_dict:
            continue
        if job_dict.has_key('PROJECT'):
            if job_dict['PROJECT'] == project_name:
                matching_job_ids.append(job_dict['JOB_ID'])
    return matching_job_ids
Exemple #2
0
def get_job_ids_with_specified_project_name(
    client_id,
    project_name,
    mrsl_files_dir,
    logger,
    ):
    """Helper for finding a job with a given project field"""

    client_dir = client_id_dir(client_id)

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = os.path.abspath(os.path.join(mrsl_files_dir, client_dir)) \
         + os.sep

    # this is heavy :-/ we must loop all the mrsl files submitted by the user
    # to find the job ids belonging to the specified project

    matching_job_ids = []
    all_files = os.listdir(base_dir)

    for mrsl_file in all_files:
        job_dict = unpickle(base_dir + os.sep + mrsl_file, logger)
        if not job_dict:
            continue
        if job_dict.has_key('PROJECT'):
            if job_dict['PROJECT'] == project_name:
                matching_job_ids.append(job_dict['JOB_ID'])
    return matching_job_ids
Exemple #3
0
def update_section_helper(client_id,
                          configuration,
                          section_filename,
                          changes,
                          defaults,
                          create_missing=True):
    """Update settings section in pickled file with values from changes
    dictionary. Optional create_missing can be used if the pickle should be
    created if not already there.
    The defaults dictionary is used to set any missing values.
    """

    client_dir = client_id_dir(client_id)
    section_path = os.path.join(configuration.user_settings, client_dir,
                                section_filename)
    if not os.path.exists(section_path):
        if create_missing:
            section_dict = {}
        else:
            raise Exception('no %s file to update!' % section_filename)
    else:
        section_dict = unpickle(section_path, configuration.logger)
    for (key, val) in defaults.items():
        section_dict[key] = section_dict.get(key, val)
    section_dict.update(changes)
    if not pickle(section_dict, section_path, configuration.logger):
        raise Exception('could not save updated %s file!' % section_filename)
    return section_dict
Exemple #4
0
def list_items_in_pickled_list(path, logger, allow_missing=False):

    # list items

    _list = unpickle(path, logger, allow_missing)
    if _list is False:
        return (False, 'Failure: could not unpickle list')
    return (True, _list)
Exemple #5
0
def recv_notification(configuration, path):
    """Read notification event from file"""
    logger = configuration.logger
    # logger.debug("read_notification: %s" % file)
    status = True
    new_notification = unpickle(path, logger)
    if not new_notification:
        logger.error("Failed to unpickle: %s" % path)
        return False
    user_id = new_notification.get('user_id', '')
    # logger.debug("Received user_id: '%s'" % user_id)
    if not user_id:
        status = False
        logger.error("Missing user_id in notification: %s" % path)
    else:
        client_id = expand_openid_alias(user_id, configuration)
        # logger.debug("resolved client_id: '%s'" % client_id)
        if not client_id or not extract_field(client_id, 'email'):
            status = False
            logger.error("Failed to resolve client_id from user_id: '%s'" %
                         user_id)
    if status:
        category = new_notification.get('category', [])
        # logger.debug("Received category: %s" % category)
        if not isinstance(category, list):
            status = False
            logger.error("Received category: %s must be a list" % category)
    if status:
        logger.info("Received event: %s, from: '%s'" % (category, client_id))
        new_timestamp = new_notification.get('timestamp')
        message = new_notification.get('message', '')
        # logger.debug("Received message: %s" % message)
        client_dict = received_notifications.get(client_id, {})
        if not client_dict:
            received_notifications[client_id] = client_dict
        files_list = client_dict.get('files', [])
        if not files_list:
            client_dict['files'] = files_list
        if path in files_list:
            logger.warning("Skipping prevoursly received notification: '%s'" %
                           path)
        else:
            files_list.append(path)
            client_dict['timestamp'] = min(
                client_dict.get('timestamp', sys.maxint), new_timestamp)
            messages_dict = client_dict.get('messages', {})
            if not messages_dict:
                client_dict['messages'] = messages_dict
            header = " ".join(category)
            if not header:
                header = '* UNKNOWN *'
            body_dict = messages_dict.get(header, {})
            if not body_dict:
                messages_dict[header] = body_dict
            message_count = body_dict.get(message, 0)
            body_dict[message] = message_count + 1

    return status
Exemple #6
0
def list_items_in_pickled_list(path, logger):

    # list items

    list_ = unpickle(path, logger)
    if list_ == []:
        pass
    elif not list_:
        return (False, 'Failure: could not unpickle list')
    return (True, list_)
Exemple #7
0
def fill_triggers(configuration, vgrids_dict):
    """Search for system_imagesettings triggers and the needed information,
    such as rule_id, run_as and path to *vgrids_dict*"""

    status = True
    logger = configuration.logger
    logger.info(str(vgrids_dict.keys()))
    for key in vgrids_dict:
        logger.info('----------------------------------------------')
        logger.info('%s' % key)
        logger.info('----------------------------------------------')

        vgrid = vgrids_dict[key]['vgrid']
        vgridpath = vgrids_dict[key]['vgridpath']

        trigger_file = \
            os.path.join(os.path.join(configuration.vgrid_home, vgrid),
                         configuration.vgrid_triggers)

        if not os.path.exists(trigger_file):
            logger.warning("Missing trigger configuration: '%s'" %
                           trigger_file)
        else:
            triggers = unpickle(trigger_file, logger)
            if not isinstance(triggers, list):
                status = False
                logger.error("Couldn't load trigger configuration: '%s'" %
                             trigger_file)
                break

            for trigger in triggers:
                if trigger['rule_id'].startswith('system_imagesettings_'):
                    vgridtriggerpath = get_vgridtriggerpath(
                        vgrid, trigger['path'])
                    if trigger['rule_id'] \
                        == 'system_imagesettings_meta_created' \
                        or trigger['rule_id'] \
                        == 'system_imagesettings_dir_deleted' \
                        or vgridtriggerpath == vgridpath:

                        logger.info("vgrid: '%s'" % vgrid)
                        logger.info("path: '%s'" % vgridpath)
                        logger.info("rule_id: '%s'" % trigger['rule_id'])
                        logger.info("run_as '%s'" % trigger['run_as'])
                        logger.info(
                            '----------------------------------------------')
                        trigger = {
                            'rule_id': trigger['rule_id'],
                            'run_as': trigger['run_as'],
                            'path': vgridpath
                        }
                        vgrids_dict[key]['triggers'].append(trigger)

    return status
Exemple #8
0
def load_queue(path, logger):
    """Load job queue from path"""

    # Load and add current logger

    queue = io.unpickle(path, logger)
    if not queue:
        # unpickle not successful
        return None
    else:
        queue.logger = logger
        return queue
Exemple #9
0
def get_resource_configuration(resource_home, unique_resource_name, logger):

    # open the configuration file

    resource_config_file = resource_home + "/" + unique_resource_name + "/config"
    resource_config = unpickle(resource_config_file, logger)
    if not resource_config:
        msg = "could not unpickle %s" % resource_config_file
        logger.error(msg)
        return (False, msg)
    else:
        return (True, resource_config)
Exemple #10
0
def is_item_in_pickled_list(path, item, logger):
    list_ = unpickle(path, logger)
    if not list_:
        return False

    if len(list_) == 0:
        return False

    if item in list_:
        return True
    else:
        return False
Exemple #11
0
def is_item_in_pickled_list(path, item, logger):
    list_ = unpickle(path, logger)
    if not list_:
        return False

    if len(list_) == 0:
        return False

    if item in list_:
        return True
    else:
        return False
Exemple #12
0
def load_queue(path, logger):
    """Load job queue from path"""

    # Load and add current logger

    queue = io.unpickle(path, logger)
    if not queue:
        # unpickle not successful
        return None
    else:
        queue.logger = logger
        return queue
def main():
    configuration = get_configuration_object()

    # Overwrite default logger

    logger = configuration.logger = get_logger(logging.INFO)

    logger = configuration.logger = get_logger(logging.INFO)
    vgrids_dict = unpickle(TRIGGER_DICT_FILE, logger)

    vgrid_list = get_vgrids_dict(vgrids_dict)
    for name in vgrid_list:
        print name
Exemple #14
0
def main():
    status = True
    configuration = get_configuration_object()
    logger = configuration.logger = get_logger(logging.INFO)

    # Overwrite default logger

    argc = len(sys.argv) - 1
    user_vgrid_list = None
    if argc == 1:
        user_vgrid_list = [vgrid.strip() for vgrid in sys.argv[1].split(',')]
        logger.info('Using custom vgrid_list: %s' % user_vgrid_list)

    vgrids_dict = unpickle(TRIGGER_DICT_FILE, logger)
    update_trigger_dict = None

    if vgrids_dict:
        (vgrids_dict, vgrid_list) = filter_vgrids_dict(configuration,
                                                       vgrids_dict,
                                                       user_vgrid_list)
    else:
        status = False
        logger.error("Missing vgrid dict file: '%s'" % TRIGGER_DICT_FILE)

    if status:
        status = backup_trigger_files(configuration, vgrid_list)

    if status:
        status = backup_imagesettings_files(configuration, vgrid_list)

    if status:
        status = backup_paraview_links(configuration, vgrid_list)

    if status:
        update_trigger_dict = \
            get_update_trigger_dict_and_check_for_unique_clientid(configuration,
                vgrids_dict)
        if update_trigger_dict is None:
            status = False

    if status:
        status = remove_triggers(configuration, vgrids_dict)

    if status:
        status = update_backend(configuration, update_trigger_dict)

    if status:
        return 0
    else:
        return 1
Exemple #15
0
def get_resource_configuration(resource_home, unique_resource_name, logger):
    """Load a resource configuration from file"""

    # open the configuration file

    resource_config_file = resource_home + '/' + unique_resource_name\
         + '/config'
    resource_config = unpickle(resource_config_file, logger)
    if not resource_config:
        msg = 'could not unpickle %s' % resource_config_file
        logger.error(msg)
        return (False, msg)
    else:
        return (True, resource_config)
def get_dict_from_display_number(display_number, configuration, logger):
    (init_ret, filename) = \
        initialize_and_get_display_dict_filename(configuration, logger)
    if not init_ret:
        return (False, 'could not initialize')

    dict = unpickle(filename, logger)
    if dict == False:
        print 'dict is %s false' % dict
        return (False, 'could not unpickle %s' % filename)

    if dict.has_key(display_number):
        return (display_number, dict[display_number])
    else:
        return (True, -1)
def get_dict_from_display_number(display_number, configuration, logger):
    (init_ret, filename) = \
        initialize_and_get_display_dict_filename(configuration, logger)
    if not init_ret:
        return (False, 'could not initialize')

    dict = unpickle(filename, logger)
    if dict == False:
        print 'dict is %s false' % dict
        return (False, 'could not unpickle %s' % filename)

    if dict.has_key(display_number):
        return (display_number, dict[display_number])
    else:
        return (True, -1)
Exemple #18
0
def load_section_helper(client_id, configuration, section_filename,
                        section_keys, include_meta=False):
    """Load settings section from pickled file. Optional include_meta
    controls the inclusion of meta data like creator and creation time.
    """

    client_dir = client_id_dir(client_id)
    section_path = os.path.join(configuration.user_settings, client_dir,
                                section_filename)
    section_dict = unpickle(section_path, configuration.logger)
    if section_dict and not include_meta:
        real_keys = section_keys
        for key in section_dict.keys():
            if not key in real_keys:
                del section_dict[key]
    return section_dict
Exemple #19
0
    def fill_from_mrsl(self, job_data):
        """Read a pickled mRSL file and fill in the contained data"""

        # jobData supposed to be a pickled file or  a dictionary.
        # add a solid type later!

        self.__logger.debug('filling in job data from file %s' % job_data)
        try:
            if os.path.exists(job_data):
                job = unpickle(job_data, self.__logger)
                self.fill_from_dict(job)
            else:
                self.__logger.error('file %s does not exist.' % job_data)
        except Exception, err:
            self.__logger.error('while filling in data from %s: %s' %
                                (job_data, err))
Exemple #20
0
def testresource_has_re_specified(unique_resource_name, re_name,
                                  configuration):
    """Check if unique_resource_name has runtime env enabled"""
    resource_config = unpickle(configuration.resource_home
                                + unique_resource_name + '/config',
                               configuration.logger)
    if not resource_config:
        configuration.logger.error('error unpickling resource config')
        return False

    for rre in resource_config['RUNTIMEENVIRONMENT']:
        (res_name, res_val) = rre
        if re_name == res_name:
            return True

    return False
Exemple #21
0
def testresource_has_re_specified(unique_resource_name, re_name,
                                  configuration):
    """Check if unique_resource_name has runtime env enabled"""
    resource_config = unpickle(
        configuration.resource_home + unique_resource_name + '/config',
        configuration.logger)
    if not resource_config:
        configuration.logger.error('error unpickling resource config')
        return False

    for rre in resource_config['RUNTIMEENVIRONMENT']:
        (res_name, res_val) = rre
        if re_name == res_name:
            return True

    return False
Exemple #22
0
def remove_item_from_pickled_list(
    path,
    item,
    logger,
    allow_empty_list=True,
    ):

    list_ = unpickle(path, logger)
    output = ''
    if list_ == []:

        # OK, if the list is empty

        pass
    elif not list_:

        output += 'Failure: could not unpickle current list'
        return (False, output)

    # Check if the item is in the list

    item = item.strip()
    if not item in list_:
        output += '%s not found in list' % item
        return (False, output)

    if not allow_empty_list:
        if len(list_) <= 1:
            output += 'You cannot remove the last item'
            return (False, output)

    # ok, lets remove the item and pickle and save the new list

    try:
        list_.remove(item)
    except:
        output += \
            'Strange error, %s could not be removed, but it seems to be in the list'\
             % item
        return (False, output)

    status = pickle(list_, path, logger)
    if not status:
        output += 'Error pickling new owners file'
        return (False, output)

    return (True, output)
Exemple #23
0
def remove_item_from_pickled_list(
    path,
    item,
    logger,
    allow_empty_list=True,
):

    list_ = unpickle(path, logger)
    output = ''
    if list_ == []:

        # OK, if the list is empty

        pass
    elif not list_:

        output += 'Failure: could not unpickle current list'
        return (False, output)

    # Check if the item is in the list

    item = item.strip()
    if not item in list_:
        output += '%s not found in list' % item
        return (False, output)

    if not allow_empty_list:
        if len(list_) <= 1:
            output += 'You cannot remove the last item'
            return (False, output)

    # ok, lets remove the item and pickle and save the new list

    try:
        list_.remove(item)
    except:
        output += \
            'Strange error, %s could not be removed, but it seems to be in the list'\
             % item
        return (False, output)

    status = pickle(list_, path, logger)
    if not status:
        output += 'Error pickling new owners file'
        return (False, output)

    return (True, output)
def set_user_display_inactive(
    client_id,
    display_number,
    configuration,
    logger,
):

    (init_ret, filename) = \
        initialize_and_get_display_dict_filename(configuration, logger)
    if not init_ret:
        return (False, 'could not initialize')

    current_display = get_users_display_number(client_id, configuration,
                                               logger)
    if not current_display:
        return (
            False,
            'could not remove active display since no entry was found for %s' %
            client_id)

    if current_display == -1:
        return (
            False,
            'user %s does not have a display registered, unable to inactivate any display'
            % client_id)

    if current_display != display_number:
        return (
            False,
            'user %s had display %s registered in dict, but specified display_number in set_user_display_inactive was %s'
            % (client_id, current_display, display_number))

    # remove entry from dict and pickle it

    dict = unpickle(filename, logger)
    if dict == False:
        return (False, 'could not unpickle %s' % filename)

    if not dict.has_key(display_number):
        return (False, 'display %s not found in dict' % display_number)
    try:
        del dict[display_number]
    except Exception, e:
        return (
            False,
            'exception trying to remove %s from display dict. Exception %s' %
            (display_number, e))
def get_users_display_dict(client_id, configuration, logger):
    (init_ret, filename) = \
        initialize_and_get_display_dict_filename(configuration, logger)
    if not init_ret:
        return (False, 'could not initialize')

    dict = unpickle(filename, logger)
    if dict == False:
        return (False, 'could not unpickle %s' % filename)

    for (key, value) in dict.items():
        if value['client_id'] == client_id:
            return (key, value)

    # not found, client_id does not have a live display

    return (-1, -1)
def get_users_display_dict(client_id, configuration, logger):
    (init_ret, filename) = \
        initialize_and_get_display_dict_filename(configuration, logger)
    if not init_ret:
        return (False, 'could not initialize')

    dict = unpickle(filename, logger)
    if dict == False:
        return (False, 'could not unpickle %s' % filename)

    for (key, value) in dict.items():
        if value['client_id'] == client_id:
            return (key, value)

    # not found, client_id does not have a live display

    return (-1, -1)
Exemple #27
0
    def fill_from_mrsl(self, job_data):
        """Read a pickled mRSL file and fill in the contained data"""

        # jobData supposed to be a pickled file or  a dictionary.
        # add a solid type later!

        self.__logger.debug('filling in job data from file %s'
                             % job_data)
        try:
            if os.path.exists(job_data):
                job = unpickle(job_data, self.__logger)
                self.fill_from_dict(job)
            else:
                self.__logger.error('file %s does not exist.'
                                     % job_data)
        except Exception, err:
            self.__logger.error('while filling in data from %s: %s'
                                 % (job_data, err))
def set_user_display_inactive(
    client_id,
    display_number,
    configuration,
    logger,
    ):

    (init_ret, filename) = \
        initialize_and_get_display_dict_filename(configuration, logger)
    if not init_ret:
        return (False, 'could not initialize')

    current_display = get_users_display_number(client_id,
            configuration, logger)
    if not current_display:
        return (False,
                'could not remove active display since no entry was found for %s'
                 % client_id)

    if current_display == -1:
        return (False,
                'user %s does not have a display registered, unable to inactivate any display'
                 % client_id)

    if current_display != display_number:
        return (False,
                'user %s had display %s registered in dict, but specified display_number in set_user_display_inactive was %s'
                 % (client_id, current_display, display_number))

    # remove entry from dict and pickle it

    dict = unpickle(filename, logger)
    if dict == False:
        return (False, 'could not unpickle %s' % filename)

    if not dict.has_key(display_number):
        return (False, 'display %s not found in dict' % display_number)
    try:
        del dict[display_number]
    except Exception, e:
        return (False,
                'exception trying to remove %s from display dict. Exception %s'
                 % (display_number, e))
Exemple #29
0
def _load_rate_limits(configuration, proto, do_lock=True):
    """Load rate limits dict"""
    logger = configuration.logger
    rate_limits_filepath = os.path.join(
        configuration.mig_system_run, "%s.%s" % (proto, _rate_limits_filename))
    if do_lock:
        rate_limits_lock = _acquire_rate_limits_lock(configuration,
                                                     proto,
                                                     exclusive=False)
    result = unpickle(rate_limits_filepath, logger)
    if do_lock:
        _release_rate_limits_lock(rate_limits_lock)

    if not isinstance(result, dict):
        logger.warning("failed to retrieve active %s rate limits from %s" %
                       (proto, rate_limits_filepath))
        result = {}

    return result
Exemple #30
0
def migrated_job(filename, client_id, configuration):
    """returns a tuple (bool status, str msg)"""

    logger = configuration.logger
    client_dir = client_id_dir(client_id)
    job_path = os.path.abspath(
        os.path.join(configuration.server_home, client_dir, filename))

    # unpickle and enqueue received job file

    job_path_spaces = job_path.replace('\\ ', '\\\\\\ ')
    job = io.unpickle(job_path_spaces, configuration.logger)

    # TODO: update any fields to mark migration?

    if not job:
        return (False,
                'Fatal migration error: loading pickled job (%s) failed! ' % \
                job_path_spaces)

    job_id = job['JOB_ID']

    # save file with other mRSL files

    mrsl_filename = \
        os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                        client_dir, job_id + '.mRSL'))

    if not io.pickle(job, mrsl_filename, configuration.logger):
        return (False, 'Fatal error: Could not write ' + filename)

    # tell 'grid_script'

    message = 'SERVERJOBFILE ' + client_dir + '/' + job_id + '\n'

    if not io.send_message_to_grid_script(message, logger, configuration):
        return (False, 'Fatal error: Could not write to grid stdin')

    # TODO: do we need to wait for grid_script to ack job reception?
    # ... same question applies to new_job, btw.

    return (True, '%s succesfully migrated.' % job_id)
Exemple #31
0
def _load_sessions(configuration, proto, do_lock=True):
    """Load sessions dict"""
    logger = configuration.logger
    sessions_filepath = os.path.join(configuration.mig_system_run,
                                     "%s.%s"
                                     % (proto, _sessions_filename))
    sessions_lock = None
    if do_lock:
        sessions_lock = _acquire_sessions_lock(
            configuration, proto, exclusive=False)
    result = unpickle(sessions_filepath, logger)
    if do_lock:
        _release_sessions_lock(sessions_lock)

    if not isinstance(result, dict):
        logger.warning("failed to retrieve active %s sessions from %s" %
                       (proto, sessions_filepath))
        result = {}

    return result
Exemple #32
0
def create_job_object_from_pickled_mrsl(filepath, logger,
        external_dict):
    """Helper for submit from pickled mRSL"""
    job_dict = unpickle(filepath, logger)
    if not job_dict:
        return (False, 'could not unpickle mrsl file %s' % filepath)
    jobo = Job()
    for (key, value) in job_dict.iteritems():
        if str(type(value)) == "<type 'time.struct_time'>":

            # time.struct_time objects cannot be marshalled in the xmlrpc
            # version we use

            value = str(value)
        if external_dict.has_key(key):

            # ok, this info can be shown to the user (avoid leaking info that
            # break anonymity)

            setattr(jobo, key, value)
    return (True, jobo)
Exemple #33
0
def create_job_object_from_pickled_mrsl(filepath, logger,
        external_dict):
    """Helper for submit from pickled mRSL"""
    job_dict = unpickle(filepath, logger)
    if not job_dict:
        return (False, 'could not unpickle mrsl file %s' % filepath)
    jobo = Job()
    for (key, value) in job_dict.iteritems():
        if str(type(value)) == "<type 'time.struct_time'>":

            # time.struct_time objects cannot be marshalled in the xmlrpc
            # version we use

            value = str(value)
        if external_dict.has_key(key):

            # ok, this info can be shown to the user (avoid leaking info that
            # break anonymity)

            setattr(jobo, key, value)
    return (True, jobo)
Exemple #34
0
def load_section_helper(client_id,
                        configuration,
                        section_filename,
                        section_keys,
                        include_meta=False,
                        allow_missing=False):
    """Load settings section from pickled file. Optional include_meta
    controls the inclusion of meta data like creator and creation time.
    Optional allow_missing is used to avoid log errors for sections that may
    or may not already exist.
    """

    client_dir = client_id_dir(client_id)
    section_path = os.path.join(configuration.user_settings, client_dir,
                                section_filename)
    section_dict = unpickle(section_path, configuration.logger, allow_missing)
    if section_dict and not include_meta:
        real_keys = section_keys
        for key in section_dict.keys():
            if not key in real_keys:
                del section_dict[key]
    return section_dict
Exemple #35
0
    def __check_dict(self, stattype_key, stattype_value):
        """Checking if dict exists on disk and loads it into memory"""

        # If stattype doesn't exist create it

        if not self.__gridstat_dict.has_key(stattype_key):
            self.__gridstat_dict[stattype_key] = {}

        # If stattype not in memory, check if pickled file exists

        if not self.__gridstat_dict[stattype_key].has_key(stattype_value):
            filename = self.__configuration.gridstat_files_dir\
                 + stattype_key + os.sep + stattype_value.upper()\
                 + '.pck'
            if os.path.exists(filename):
                stat_dict = unpickle(filename, self.__logger)
            else:
                stat_dict = None
            if stat_dict:
                self.__gridstat_dict[stattype_key][stattype_value] = \
                    stat_dict
            else:
                self.__gridstat_dict[stattype_key][stattype_value] = {}
Exemple #36
0
    def __check_dict(self, stattype_key, stattype_value):
        """Checking if dict exists on disk and loads it into memory"""

        # If stattype doesn't exist create it

        if not self.__gridstat_dict.has_key(stattype_key):
            self.__gridstat_dict[stattype_key] = {}

        # If stattype not in memory, check if pickled file exists

        if not self.__gridstat_dict[stattype_key].has_key(stattype_value):
            filename = self.__configuration.gridstat_files_dir\
                 + stattype_key + os.sep + stattype_value.upper()\
                 + '.pck'
            if os.path.exists(filename):
                stat_dict = unpickle(filename, self.__logger)
            else:
                stat_dict = None
            if stat_dict:
                self.__gridstat_dict[stattype_key][stattype_value] = \
                    stat_dict
            else:
                self.__gridstat_dict[stattype_key][stattype_value] = {}
Exemple #37
0
def add_item_to_pickled_list(path, item, logger):
    list_ = unpickle(path, logger)
    output = ''
    if list_ == []:
        pass
    elif not list_:
        output += 'Failure: could not unpickle current list'
        return (False, output)

    # Check if the item already is in the list

    if item in list_:
        output += '%s is already in the list' % item
        return (False, output)

    # ok, lets add the new item and pickle and save the new list

    list_.append(item)
    status = pickle(list_, path, logger)
    if not status:
        output += 'pickle error'
        return (False, output)

    return (True, '')
Exemple #38
0
def add_item_to_pickled_list(path, item, logger):
    list_ = unpickle(path, logger)
    output = ''
    if list_ == []:
        pass
    elif not list_:
        output += 'Failure: could not unpickle current list'
        return (False, output)

    # Check if the item already is in the list

    if item in list_:
        output += '%s is already in the list' % item
        return (False, output)

    # ok, lets add the new item and pickle and save the new list

    list_.append(item)
    status = pickle(list_, path, logger)
    if not status:
        output += 'pickle error'
        return (False, output)

    return (True, '')
Exemple #39
0
def update_section_helper(client_id, configuration, section_filename, changes,
                          defaults, create_missing=True):
    """Update settings section in pickled file with values from changes
    dictionary. Optional create_missing can be used if the pickle should be
    created if not already there.
    The defaults dictionary is used to set any missing values.
    """

    client_dir = client_id_dir(client_id)
    section_path = os.path.join(configuration.user_settings, client_dir,
                                 section_filename)
    if not os.path.exists(section_path):
        if create_missing:
            section_dict = {}
        else:
            raise Exception('no %s file to update!' % section_filename)
    else:
        section_dict = unpickle(section_path, configuration.logger)
    for (key, val) in defaults.items():
        section_dict[key] = section_dict.get(key, val)
    section_dict.update(changes)
    if not pickle(section_dict, section_path, configuration.logger):
        raise Exception('could not save updated %s file!' % section_filename)
    return section_dict
Exemple #40
0
        return (False, '''Fatal error: Could not get exclusive access or write
to %s''' % configuration.grid_stdin)

    if forceddestination and forceddestination.has_key('RE_NAME'):

        # add job_id to runtime environment verification history

        unique_resource_name = forceddestination['UNIQUE_RESOURCE_NAME']
        re_name = forceddestination['RE_NAME']

        resource_config_filename = configuration.resource_home\
             + unique_resource_name + '/config'

        # open resource config

        resource_config = unpickle(resource_config_filename, logger)
        if not resource_config:
            logger.error('error unpickling resource config')
            return (False, 'error unpickling resource config')

        dict_entry = (job_id, client_id)

        # add entry to runtime verification history

        if not resource_config.has_key('RUNTVERIFICATION'):
            resource_config['RUNTVERIFICATION'] = \
                {re_name: [dict_entry]}
        else:
            before_runt_dict = resource_config['RUNTVERIFICATION']
            if not before_runt_dict.has_key(re_name):
                before_runt_dict[re_name] = [].append(dict_entry)
Exemple #41
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id, op_header=False)
    client_dir = client_id_dir(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
        )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    job_ids = accepted['job_id']
    action = accepted['action'][-1]
    src = accepted['src']
    dst = accepted['dst'][-1]

    title_entry = find_entry(output_objects, 'title')
    title_entry['text'] = '%s live I/O' % configuration.short_title
    output_objects.append({'object_type': 'header', 'text'
                           : 'Request live communication with jobs'})

    if not action in valid_actions:
        output_objects.append({'object_type': 'error_text', 'text'
                               : 'Invalid action "%s" (supported: %s)' % \
                               (action, ', '.join(valid_actions))})
        return (output_objects, returnvalues.CLIENT_ERROR)

    if action in post_actions and not correct_handler('POST'):
        output_objects.append(
            {'object_type': 'error_text', 'text'
             : 'Only accepting POST requests to prevent unintended updates'})
        return (output_objects, returnvalues.CLIENT_ERROR)

    if not job_ids or action in interactive_actions:
        job_id = ''
        if job_ids:
            job_id = job_ids[-1]
        output_objects.append({'object_type': 'text', 'text'
                          : '''
Fill in the live I/O details below to request communication with a running
job.
Job ID can be a full ID or a wild card pattern using "*" and "?" to match one
or more of your job IDs.
Use send output without source and destination paths to request upload of the
default stdio files from the job on the resource to the associated job_output
directory in your MiG home.
Destination is a always handled as a directory path to put source files into.
Source and destination paths are always taken relative to the job execution
directory on the resource and your MiG home respectively.
'''})
        html = '''
<table class="liveio">
<tr>
<td>
<form method="post" action="liveio.py">
<table class="liveio">
<tr><td class=centertext>
</td></tr>
<tr><td>
Action:<br />
<input type=radio name=action checked value="send" />send output
<input type=radio name=action value="get" />get input
</td></tr>
<tr><td>
Job ID:<br />
<input type=text size=60 name=job_id value="%s" />
</td></tr>
<tr><td>
Source path(s):<br />
<div id="srcfields">
<input type=text size=60 name=src value="" /><br />
</div>
</td></tr>
<tr><td>
Destination path:<br />
<input type=text size=60 name=dst value="" />
</td></tr>
<tr><td>
<input type="submit" value="Send request" />
</td></tr>
</table>
</form>
</td>
<td>
<script type="text/javascript">
fields = 1;
max_fields = 64;
function addInput() {
    if (fields < max_fields) {
        document.getElementById("srcfields").innerHTML += "<input type=text size=60 name=src value='' /><br />";
        fields += 1;
    } else {
        alert("Maximum " + max_fields + " source fields allowed!");
        document.form.add.disabled=true;
    }
}
</script>
<form name="addsrcform">
<input type="button" onclick="addInput(); return false;" name="add" value="Add another source field" />
</form>
</td>
</tr>
</table>
''' % job_id
        output_objects.append({'object_type': 'html_form', 'text'
                              : html})
        output_objects.append({'object_type': 'text', 'text': '''
Further live job control is avalable through your personal message queues.
They provide a basic interface for centrally storing messages under your grid
account and can be used to pass messages between jobs or for orchestrating
jobs before and during execution.
'''
                               })
        output_objects.append({'object_type': 'link', 'destination':
                               'mqueue.py',
                               'text': 'Message queue interface'})
        return (output_objects, returnvalues.OK)
    elif action in ['get', 'receive', 'input']:
        action = 'get'
        action_desc = 'will be downloaded to the job on the resource'
    elif action in ['put', 'send', 'output']:
        action = 'send'
        action_desc = 'will be uploaded from the job on the resource'
    else:
        output_objects.append({'object_type': 'error_text', 'text'
                              : 'Invalid live io action: %s' % action})
        return (output_objects, returnvalues.CLIENT_ERROR)

    output_objects.append({'object_type': 'text', 'text'
                          : 'Requesting live I/O for %s'
                           % ', '.join(job_ids)})

    if action == 'get' and (not src or not dst):
        output_objects.append(
            {'object_type': 'error_text',
             'text': 'src and dst parameters required for live input'})
        return (output_objects, returnvalues.CLIENT_ERROR)

    # Automatic fall back to stdio files if output with no path provided
                
    if src:
        src_text = 'The files ' + ' '.join(src)
    else:
        src_text = 'The job stdio files'

    if dst:
        dst_text = 'the ' + dst + ' directory'
    else:
        dst_text = 'the corresponding job_output directory'

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = \
        os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                        client_dir)) + os.sep

    filelist = []
    for job_id in job_ids:
        job_id = job_id.strip()

        # is job currently being executed?

        # Backward compatibility - all_jobs keyword should match all jobs

        if job_id == all_jobs:
            job_id = '*'

        # Check directory traversal attempts before actual handling to avoid
        # leaking information about file system layout while allowing
        # consistent error messages

        unfiltered_match = glob.glob(base_dir + job_id + '.mRSL')
        match = []
        for server_path in unfiltered_match:
            real_path = os.path.abspath(server_path)
            if not valid_user_path(real_path, base_dir, True):

                # out of bounds - save user warning for later to allow
                # partial match:
                # ../*/* is technically allowed to match own files.

                logger.warning("%s tried to %s restricted path %s ! (%s)" % \
                                (client_id, op_name, real_path, job_id))

                continue

            # Insert valid job files in filelist for later treatment

            match.append(real_path)

        # Now actually treat list of allowed matchings and notify if no
        # (allowed) match....

        if not match:
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : '%s: You do not have any matching job IDs!' % job_id})
        else:
            filelist += match

    for filepath in filelist:

        # Extract jo_id from filepath (replace doesn't modify filepath)

        mrsl_file = filepath.replace(base_dir, '')
        job_id = mrsl_file.replace('.mRSL', '')
        job_dict = unpickle(filepath, logger)
        if not job_dict:
            status = returnvalues.CLIENT_ERROR

            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : ('You can only list status of your own jobs. '
                    'Please verify that you submitted the mRSL file '
                    'with job id "%s" (Could not unpickle mRSL file %s)'
                    ) % (job_id, filepath)})
            continue

        if job_dict['STATUS'] != 'EXECUTING':
            output_objects.append(
                {'object_type': 'text', 'text'
                 : 'Job %s is not currently being executed! Job status: %s'
                 % (job_id, job_dict['STATUS'])})
            continue

        if job_dict['UNIQUE_RESOURCE_NAME'] == 'ARC':
            output_objects.append(
                {'object_type': 'text', 'text'
                 : 'Job %s is submitted to ARC, details are not available!'
                 % job_id })
            continue

        last_live_update_dict = {}
        last_live_update_file = configuration.mig_system_files + os.sep\
             + job_id + '.last_live_update'
        if os.path.isfile(last_live_update_file):
            last_live_update_dict_unpickled = \
                unpickle(last_live_update_file, logger)
            if not last_live_update_dict_unpickled:
                output_objects.append({'object_type': 'error_text',
                        'text'
                        : 'Could not unpickle %s - skipping request!'
                         % last_live_update_file})
                continue

            if not last_live_update_dict_unpickled.has_key(
                'LAST_LIVE_UPDATE_REQUEST_TIMESTAMP'):
                output_objects.append(
                    {'object_type': 'error_text',
                     'text': 'Could not find needed key in %s.'
                     % last_live_update_file})
                continue

            last_live_update_request = \
                last_live_update_dict_unpickled['LAST_LIVE_UPDATE_REQUEST_TIMESTAMP'
                    ]

            difference = datetime.datetime.now()- last_live_update_request
            try:
                min_delay = \
                    int(configuration.min_seconds_between_live_update_requests)
            except:
                min_delay = 30

            if difference.seconds < min_delay:
                output_objects.append(
                    {'object_type': 'error_text',
                     'text': ('Request not allowed, you must wait at least ' \
                              '%s seconds between live update requests!'
                              ) % min_delay})
                continue

        # save this request to file to avoid DoS from a client request loop.

        last_live_update_dict['LAST_LIVE_UPDATE_REQUEST_TIMESTAMP'] = \
            datetime.datetime.now()
        pickle_ret = pickle(last_live_update_dict,
                            last_live_update_file, logger)
        if not pickle_ret:
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : 'Error saving live io request timestamp to last_live_update '
                 'file, request not sent!'})
            continue

        # #
        # ## job is being executed right now, send live io request to frontend
        # #

        # get resource_config, needed by scp_file_to_resource
        #(status, resource_config) = get_resource_configuration(
        #    resource_home, unique_resource_name, logger)

        resource_config = job_dict['RESOURCE_CONFIG']
        (status, exe) = get_resource_exe(resource_config, job_dict['EXE'],
                                         logger)
        if not status:
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : 'Could not get exe configuration for job %s' % job_id})
            continue

        local_file = '%s.%supdate' % (job_dict['LOCALJOBNAME'], action)
        if not os.path.exists(local_file):

            # create

            try:
                filehandle = open(local_file, 'w')
                filehandle.write('job_id '
                                  + job_dict['JOB_ID'] + '\n')
                filehandle.write('localjobname '
                                  + job_dict['LOCALJOBNAME'] + '\n')
                filehandle.write('execution_user '
                                  + exe['execution_user'] + '\n')
                filehandle.write('execution_node '
                                  + exe['execution_node'] + '\n')
                filehandle.write('execution_dir ' + exe['execution_dir']
                                  + '\n')
                filehandle.write('target liveio\n')

                # Leave defaults src and dst to FE script if not provided
                
                if src:
                    filehandle.write('source ' + ' '.join(src) + '\n')
                if dst:
                    filehandle.write('destination ' + dst + '\n')

                # Backward compatible test for shared_fs - fall back to scp

                if exe.has_key('shared_fs') and exe['shared_fs']:
                    filehandle.write('copy_command cp\n')
                    filehandle.write('copy_frontend_prefix \n')
                    filehandle.write('copy_execution_prefix \n')
                else:
                    filehandle.write('copy_command scp -B\n')
                    filehandle.write('copy_frontend_prefix ${frontend_user}@${frontend_node}:\n'
                            )
                    filehandle.write('copy_execution_prefix ${execution_user}@${execution_node}:\n'
                            )

                filehandle.write('### END OF SCRIPT ###\n')
                filehandle.close()
            except Exception, exc:
                pass

        if not os.path.exists(local_file):
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : '.%supdate file not available on %s server' % \
                 (action, configuration.short_title)})
            continue

        scpstatus = copy_file_to_resource(local_file, '%s.%supdate'
                 % (job_dict['LOCALJOBNAME'], action), resource_config, logger)
        if not scpstatus:
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : 'Error sending request for live io to resource!'})
            continue
        else:
            output_objects.append(
                {'object_type': 'text', 'text'
                 : 'Request for live io was successfully sent to the resource!'
                 })
            output_objects.append(
                {'object_type': 'text', 'text'
                 : '%s %s and should become available in %s in a minute.' % \
                 (src_text, action_desc, dst_text)
                 })
            if action == 'send':
                if not dst:
                    target_path = '%s/%s/*' % (job_output_dir, job_id)
                else:
                    target_path = dst
                output_objects.append({'object_type': 'link', 'destination'
                                       : 'ls.py?path=%s' % target_path,
                                       'text': 'View uploaded files'})
            else:
                output_objects.append({'object_type': 'link', 'destination'
                                       : 'ls.py?path=%s' % ';path='.join(src),
                                       'text': 'View files for download'})

        try:
            os.remove(local_file)
        except Exception, exc:
            pass
Exemple #42
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id)
    client_dir = client_id_dir(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
        )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    if not correct_handler('POST'):
        output_objects.append(
            {'object_type': 'error_text', 'text'
             : 'Only accepting POST requests to prevent unintended updates'})
        return (output_objects, returnvalues.CLIENT_ERROR)

    patterns = accepted['job_id']

    if not patterns:
        output_objects.append({'object_type': 'error_text', 'text'
                              : 'No job_id specified!'})
        return (output_objects, returnvalues.NO_SUCH_JOB_ID)

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = \
        os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                        client_dir)) + os.sep

    filelist = []
    keywords_dict = mrslkeywords.get_keywords_dict(configuration)
    for pattern in patterns:
        pattern = pattern.strip()

        # Backward compatibility - all_jobs keyword should match all jobs

        if pattern == all_jobs:
            pattern = '*'

        # Check directory traversal attempts before actual handling to avoid
        # leaking information about file system layout while allowing
        # consistent error messages

        unfiltered_match = glob.glob(base_dir + pattern + '.mRSL')
        match = []
        for server_path in unfiltered_match:
            real_path = os.path.abspath(server_path)
            if not valid_user_path(real_path, base_dir, True):

                # out of bounds - save user warning for later to allow
                # partial match:
                # ../*/* is technically allowed to match own files.

                logger.warning('%s tried to %s restricted path %s ! (%s)'
                               % (client_id, op_name, real_path, pattern))
                continue

            # Insert valid job files in filelist for later treatment

            match.append(real_path)

        # Now actually treat list of allowed matchings and notify if no
        # (allowed) match

        if not match:
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : '%s: You do not have any matching job IDs!' % pattern})
            status = returnvalues.CLIENT_ERROR
        else:
            filelist += match

    # resubmit is hard on the server

    if len(filelist) > 100:
        output_objects.append({'object_type': 'error_text', 'text'
                               : 'Too many matching jobs (%s)!'
                               % len(filelist)})
        return (output_objects, returnvalues.CLIENT_ERROR)

    resubmitobjs = []
    status = returnvalues.OK
    for filepath in filelist:
        mrsl_file = filepath.replace(base_dir, '')
        job_id = mrsl_file.replace('.mRSL', '')

        # ("Resubmitting job with job_id: %s" % job_id)

        resubmitobj = {'object_type': 'resubmitobj', 'job_id': job_id}

        mrsl_dict = unpickle(filepath, logger)
        if not mrsl_dict:
            resubmitobj['message'] = "No such job: %s (%s)" % (job_id,
                                                               mrsl_file)
            status = returnvalues.CLIENT_ERROR
            resubmitobjs.append(resubmitobj)
            continue

        resubmit_items = keywords_dict.keys()

        # loop selected keywords and create mRSL string

        resubmit_job_string = ''

        for dict_elem in resubmit_items:
            value = ''
            # Extract job value with fallback to default to support optional
            # fields
            job_value = mrsl_dict.get(dict_elem,
                                      keywords_dict[dict_elem]['Value'])
            if keywords_dict[dict_elem]['Type'].startswith('multiplekeyvalues'):
                for (elem_key, elem_val) in job_value:
                    if elem_key:
                        value += '%s=%s\n' % (str(elem_key).strip(),
                                              str(elem_val).strip())
            elif keywords_dict[dict_elem]['Type'].startswith('multiple'):
                for elem in job_value:
                    if elem:
                        value += '%s\n' % str(elem).rstrip()
            else:
                if str(job_value):
                    value += '%s\n' % str(job_value).rstrip()

            # Only insert keywords with an associated value

            if value:
                if value.rstrip() != '':
                    resubmit_job_string += '''::%s::
%s

''' % (dict_elem, value.rstrip())

        # save tempfile

        (filehandle, tempfilename) = \
            tempfile.mkstemp(dir=configuration.mig_system_files,
                             text=True)
        os.write(filehandle, resubmit_job_string)
        os.close(filehandle)

        # submit job the usual way

        (new_job_status, msg, new_job_id) = new_job(tempfilename,
                client_id, configuration, False, True)
        if not new_job_status:
            resubmitobj['status'] = False
            resubmitobj['message'] = msg
            status = returnvalues.SYSTEM_ERROR
            resubmitobjs.append(resubmitobj)
            continue

            # o.out("Resubmit failed: %s" % msg)
            # o.reply_and_exit(o.ERROR)

        resubmitobj['status'] = True
        resubmitobj['new_job_id'] = new_job_id
        resubmitobjs.append(resubmitobj)

        # o.out("Resubmit successful: %s" % msg)
        # o.out("%s" % msg)

    output_objects.append({'object_type': 'resubmitobjs', 'resubmitobjs'
                          : resubmitobjs})

    return (output_objects, status)
Exemple #43
0
        # Traverse mRSL dir and update cache

        for (root, _, files) in os.walk(root_dir, topdown=True):

            # skip all dot dirs - they are from repos etc and _not_ jobs

            if root.find(os.sep + '.') != -1:
                continue
            for name in files:
                filename = os.path.join(root, name)

                # Only files modified since last update is checked

                if os.path.getmtime(filename) > last_buildtime:
                    job_dict = unpickle(filename, self.__logger)
                    if not job_dict:
                        msg = 'gridstat::update() could not load: %s '\
                             % filename
                        self.__logger.error(msg)
                        continue

                    job_vgrids = validated_vgrid_list(self.__configuration,
                                                      job_dict)

                    for job_vgrid_name in job_vgrids:

                        # Update the statistics and cache
                        # from the job details

                        job_vgrid_name = job_vgrid_name.upper()
Exemple #44
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id)
    client_dir = client_id_dir(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
        )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    if not correct_handler('POST'):
        output_objects.append(
            {'object_type': 'error_text', 'text'
             : 'Only accepting POST requests to prevent unintended updates'})
        return (output_objects, returnvalues.CLIENT_ERROR)

    patterns = accepted['job_id']

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = \
        os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                        client_dir)) + os.sep

    status = returnvalues.OK
    filelist = []
    for pattern in patterns:
        pattern = pattern.strip()

        # Backward compatibility - all_jobs keyword should match all jobs

        if pattern == all_jobs:
            pattern = '*'

        # Check directory traversal attempts before actual handling to avoid
        # leaking information about file system layout while allowing
        # consistent error messages

        unfiltered_match = glob.glob(base_dir + pattern + '.mRSL')
        match = []
        for server_path in unfiltered_match:
            real_path = os.path.abspath(server_path)
            if not valid_user_path(real_path, base_dir, True):

                # out of bounds - save user warning for later to allow
                # partial match:
                # ../*/* is technically allowed to match own files.

                logger.warning('%s tried to %s restricted path %s ! (%s)'
                               % (client_id, op_name, real_path, pattern))
                continue

            # Insert valid job files in filelist for later treatment

            match.append(real_path)

        # Now actually treat list of allowed matchings and notify if no
        # (allowed) match

        if not match:
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : '%s: You do not have any matching job IDs!' % pattern})
            status = returnvalues.CLIENT_ERROR
        else:
            filelist += match

    # job schedule is hard on the server, limit

    if len(filelist) > 100:
        output_objects.append({'object_type': 'error_text', 'text'
                               : 'Too many matching jobs (%s)!'
                               % len(filelist)})
        return (output_objects, returnvalues.CLIENT_ERROR)

    saveschedulejobs = []

    for filepath in filelist:

        # Extract job_id from filepath (replace doesn't modify filepath)

        mrsl_file = filepath.replace(base_dir, '')
        job_id = mrsl_file.replace('.mRSL', '')

        saveschedulejob = {'object_type': 'saveschedulejob',
                           'job_id': job_id}

        dict = unpickle(filepath, logger)
        if not dict:
            saveschedulejob['message'] = \
                                       ('The file containing the information' \
                                        ' for job id %s could not be opened!' \
                                        ' You can only read schedule for ' \
                                        'your own jobs!') % job_id
            saveschedulejobs.append(saveschedulejob)
            status = returnvalues.CLIENT_ERROR
            continue

        saveschedulejob['oldstatus'] = dict['STATUS']

        # Is the job status pending?

        possible_schedule_states = ['QUEUED', 'RETRY', 'FROZEN']
        if not dict['STATUS'] in possible_schedule_states:
            saveschedulejob['message'] = \
                'You can only read schedule for jobs with status: %s.'\
                 % ' or '.join(possible_schedule_states)
            saveschedulejobs.append(saveschedulejob)
            continue

        # notify queue

        if not send_message_to_grid_script('JOBSCHEDULE ' + job_id
                 + '\n', logger, configuration):
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : 'Error sending message to grid_script, update may fail.'
                 })
            status = returnvalues.SYSTEM_ERROR
            continue

        saveschedulejobs.append(saveschedulejob)

    savescheduleinfo = """Please find any available job schedule status in
verbose job status output."""
    output_objects.append({'object_type': 'saveschedulejobs',
                          'saveschedulejobs': saveschedulejobs,
                           'savescheduleinfo': savescheduleinfo})
    return (output_objects, status)
Exemple #45
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id, op_header=False)
    client_dir = client_id_dir(client_id)
    status = returnvalues.OK
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
        )
    if not validate_status:
        logger.error("jobstatus input validation failed: %s" % accepted)
        return (accepted, returnvalues.CLIENT_ERROR)

    flags = ''.join(accepted['flags'])
    max_jobs = int(accepted['max_jobs'][-1])
    order = 'unsorted '
    if sorted(flags):
        order = 'sorted '
    patterns = accepted['job_id']
    project_names = accepted['project_name']

    if len(project_names) > 0:
        for project_name in project_names:
            project_name_job_ids = \
                get_job_ids_with_specified_project_name(client_id,
                    project_name, configuration.mrsl_files_dir, logger)
            patterns.extend(project_name_job_ids)

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = \
        os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                        client_dir)) + os.sep

    output_objects.append({'object_type': 'header', 'text'
                          : '%s %s job status' % \
                            (configuration.short_title, order)})

    if not patterns:
        output_objects.append({'object_type': 'error_text', 'text'
                               : 'No job_id specified!'})
        return (output_objects, returnvalues.NO_SUCH_JOB_ID)

    if verbose(flags):
        for flag in flags:
            output_objects.append({'object_type': 'text', 'text'
                                  : '%s using flag: %s' % (op_name,
                                  flag)})

    if not os.path.isdir(base_dir):
        output_objects.append(
            {'object_type': 'error_text', 'text'
             : ('You have not been created as a user on the %s server! ' \
                'Please contact the %s team.') % \
             (configuration.short_title, configuration.short_title)})
        return (output_objects, returnvalues.CLIENT_ERROR)

    filelist = []
    for pattern in patterns:
        pattern = pattern.strip()

        # Backward compatibility - all_jobs keyword should match all jobs

        if pattern == all_jobs:
            pattern = '*'

        # Check directory traversal attempts before actual handling to
        # avoid leaking information about file system layout while
        # allowing consistent error messages

        unfiltered_match = glob.glob(base_dir + pattern + '.mRSL')
        match = []
        for server_path in unfiltered_match:
            real_path = os.path.abspath(server_path)
            if not valid_user_path(real_path, base_dir, True):

                # out of bounds - save user warning for later to allow
                # partial match:
                # ../*/* is technically allowed to match own files.

                logger.warning('%s tried to %s restricted path %s ! (%s)'
                               % (client_id, op_name, real_path, pattern))
                continue

            # Insert valid job files in filelist for later treatment

            match.append(real_path)

        # Now actually treat list of allowed matchings and notify if no
        # (allowed) match....

        if not match:
            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : '%s: You do not have any matching job IDs!' % pattern})
            status = returnvalues.CLIENT_ERROR
        else:
            filelist += match

    if sorted(flags):
        sort(filelist)

    if max_jobs < len(filelist):
        output_objects.append(
            {'object_type': 'text', 'text'
             : 'Only showing first %d of the %d matching jobs as requested'
             % (max_jobs, len(filelist))})
        filelist = filelist[:max_jobs]

    # Iterate through jobs and print details for each

    job_list = {'object_type': 'job_list', 'jobs': []}

    for filepath in filelist:

        # Extract job_id from filepath (replace doesn't modify filepath)

        mrsl_file = filepath.replace(base_dir, '')
        job_id = mrsl_file.replace('.mRSL', '')
        job_dict = unpickle(filepath, logger)
        if not job_dict:
            status = returnvalues.CLIENT_ERROR

            output_objects.append(
                {'object_type': 'error_text', 'text'
                 : 'No such job: %s (could not load mRSL file %s)' % \
                 (job_id, filepath)})
            continue

        # Expand any job variables before use
        job_dict = expand_variables(job_dict)

        job_obj = {'object_type': 'job', 'job_id': job_id}
        job_obj['status'] = job_dict['STATUS']

        time_fields = [
            'VERIFIED',
            'VERIFIED_TIMESTAMP',
            'RECEIVED_TIMESTAMP',
            'QUEUED_TIMESTAMP',
            'SCHEDULE_TIMESTAMP',
            'EXECUTING_TIMESTAMP',
            'FINISHED_TIMESTAMP',
            'FAILED_TIMESTAMP',
            'CANCELED_TIMESTAMP',
            ]
        for name in time_fields:
            if job_dict.has_key(name):

                # time objects cannot be marshalled, asctime if timestamp

                try:
                    job_obj[name.lower()] = time.asctime(job_dict[name])
                except Exception, exc:

                    # not a time object, just add

                    job_obj[name.lower()] = job_dict[name]

        ###########################################
        # ARC job status retrieval on demand:
        # But we should _not_ update the status in the mRSL files, since 
        # other MiG code might rely on finding only valid "MiG" states.
        
        if configuration.arc_clusters and \
               job_dict.get('UNIQUE_RESOURCE_NAME', 'unset') == 'ARC' \
               and job_dict['STATUS'] == 'EXECUTING':
            try:
                home = os.path.join(configuration.user_home, client_dir)
                arcsession = arc.Ui(home)
                arcstatus = arcsession.jobStatus(job_dict['EXE'])
                job_obj['status'] = arcstatus['status']
            except arc.ARCWrapperError, err:
                logger.error('Error retrieving ARC job status: %s' % \
                             err.what())
                job_obj['status'] += '(Error: ' + err.what() + ')' 
            except arc.NoProxyError, err:
                logger.error('While retrieving ARC job status: %s' % \
                             err.what())
                job_obj['status'] += '(Error: ' + err.what() + ')' 
Exemple #46
0
if __name__ == '__main__':
    print 'starting translation test. Args: ' , len(sys.argv)
    logger.debug('translation for file ' + sys.argv[1] + ' starts')
    if len(sys.argv) > 1:
        fname = sys.argv[1]
        parsed = '.'.join([fname,'parsed'])
        translated = '.'.join([parsed,'xrsl'])

        try:
            import shared.mrslparser as p
            import shared.fileio as fileio

            (presult,errors) = p.parse(fname, 'test-id',
                                       '+No+Client+Id',None,parsed)
            if not presult:
                print 'Errors:\n%s' % errors
            else:
                print 'Parsing OK, now translating'
                mrsl_dict = fileio.unpickle(parsed,logger)
                (xrsl,script,name) = translate(mrsl_dict,'test-name')
                print '\n'.join(['Job name',name,'script',script,'XRSL'])
                fileio.write_file(script, "test-id.sh", logger)
                print (format_xrsl(xrsl))
                fileio.write_file("%s" % xrsl, translated, logger)
                print 'done'
        except Exception, err:
            print 'Error.'
            print err.__str__()

Exemple #47
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id, op_header=False)
    defaults = signature()[1]
    title_entry = find_entry(output_objects, 'title')
    label = "%s" % configuration.site_vgrid_label
    title_entry['text'] = '%s Workflows' % label
    # NOTE: Delay header entry here to include vgrid_name
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
    )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    vgrid_name = accepted['vgrid_name'][-1]
    operation = accepted['operation'][-1]
    flags = ''.join(accepted['flags'][-1])

    if not vgrid_is_owner_or_member(vgrid_name, client_id, configuration):
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            '''You must be an owner or member of %s vgrid to
access the workflows.''' % vgrid_name
        })
        return (output_objects, returnvalues.CLIENT_ERROR)

    if not operation in allowed_operations:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            '''Operation must be one of %s.''' % ', '.join(allowed_operations)
        })
        return (output_objects, returnvalues.OK)

    if operation in show_operations:

        # jquery support for tablesorter (and unused confirmation dialog)
        # table initially sorted by 0 (last update / date)

        refresh_call = 'ajax_workflowjobs("%s", "%s")' % (vgrid_name, flags)
        table_spec = {
            'table_id': 'workflowstable',
            'sort_order': '[[0,1]]',
            'refresh_call': refresh_call
        }
        (add_import, add_init,
         add_ready) = man_base_js(configuration, [table_spec])
        if operation == "show":
            add_ready += '%s;' % refresh_call
        add_ready += '''
              /* Init variables helper as foldable but closed and with individual
              heights */
              $(".variables-accordion").accordion({
                                           collapsible: true,
                                           active: false,
                                           heightStyle: "content"
                                          });
              /* fix and reduce accordion spacing */
              $(".ui-accordion-header").css("padding-top", 0)
                                       .css("padding-bottom", 0).css("margin", 0);
              /* NOTE: requires managers CSS fix for proper tab bar height */
              $(".workflow-tabs").tabs();
              $("#logarea").scrollTop($("#logarea")[0].scrollHeight);
        '''
        title_entry['script']['advanced'] += add_import
        title_entry['script']['init'] += add_init
        title_entry['script']['ready'] += add_ready
        output_objects.append({
            'object_type': 'html_form',
            'text': man_base_html(configuration)
        })

        output_objects.append({
            'object_type':
            'header',
            'text':
            '%s Workflows for %s' % (label, vgrid_name)
        })

    logger.info('vgridworkflows %s %s' % (vgrid_name, operation))

    # Iterate through jobs and list details for each

    trigger_jobs = []
    log_content = ''

    if operation in list_operations:
        trigger_job_dir = os.path.join(
            configuration.vgrid_home,
            os.path.join(vgrid_name,
                         '.%s.jobs' % configuration.vgrid_triggers))
        trigger_job_pending_dir = os.path.join(trigger_job_dir,
                                               'pending_states')
        trigger_job_final_dir = os.path.join(trigger_job_dir, 'final_states')

        if makedirs_rec(trigger_job_pending_dir, configuration) \
                and makedirs_rec(trigger_job_final_dir, configuration):
            abs_vgrid_dir = '%s/' \
                % os.path.abspath(os.path.join(configuration.vgrid_files_home,
                                               vgrid_name))
            for filename in os.listdir(trigger_job_pending_dir):
                trigger_job_filepath = \
                    os.path.join(trigger_job_pending_dir, filename)
                trigger_job = unpickle(trigger_job_filepath, logger)
                serverjob_filepath = \
                    os.path.join(configuration.mrsl_files_dir,
                                 os.path.join(
                                     client_id_dir(trigger_job['owner']),
                                     '%s.mRSL' % trigger_job['jobid']))
                serverjob = unpickle(serverjob_filepath, logger)
                if serverjob:
                    if serverjob['STATUS'] in pending_states:
                        trigger_event = trigger_job['event']
                        trigger_rule = trigger_job['rule']
                        trigger_action = trigger_event['event_type']
                        trigger_time = time.ctime(trigger_event['time_stamp'])
                        trigger_path = '%s %s' % \
                                       (trigger_event['src_path'].replace(
                                        abs_vgrid_dir, ''),
                                           trigger_event['dest_path'].replace(
                                        abs_vgrid_dir, ''))
                        job = {
                            'object_type': 'trigger_job',
                            'job_id': trigger_job['jobid'],
                            'rule_id': trigger_rule['rule_id'],
                            'path': trigger_path,
                            'action': trigger_action,
                            'time': trigger_time,
                            'status': serverjob['STATUS']
                        }
                        if not job['rule_id'].startswith(img_trigger_prefix) \
                                or verbose(flags):
                            trigger_jobs.append(job)
                    elif serverjob['STATUS'] in final_states:
                        src_path = os.path.join(trigger_job_pending_dir,
                                                filename)
                        dest_path = os.path.join(trigger_job_final_dir,
                                                 filename)
                        move_file(src_path, dest_path, configuration)
                    else:
                        logger.error(
                            'Trigger job: %s, unknown state: %s' %
                            (trigger_job['jobid'], serverjob['STATUS']))

        log_content = read_trigger_log(configuration, vgrid_name, flags)

    if operation in show_operations:

        # Always run as rule creator to avoid users being able to act on behalf
        # of ANY other user using triggers (=exploit)

        extra_fields = [
            ('path', None),
            ('match_dirs', ['False', 'True']),
            ('match_recursive', ['False', 'True']),
            ('changes', [keyword_all] + valid_trigger_changes),
            ('action', [keyword_auto] + valid_trigger_actions),
            ('arguments', None),
            ('run_as', client_id),
        ]

        # NOTE: we do NOT show saved template contents - see addvgridtriggers

        optional_fields = [('rate_limit', None), ('settle_time', None)]

        # Only include system triggers in verbose mode
        if verbose(flags):
            system_filter = []
        else:
            system_filter = [('rule_id', '%s_.*' % img_trigger_prefix)]
        (init_status,
         oobjs) = vgrid_add_remove_table(client_id,
                                         vgrid_name,
                                         'trigger',
                                         'vgridtrigger',
                                         configuration,
                                         extra_fields + optional_fields,
                                         filter_items=system_filter)
        if not init_status:
            output_objects.append({
                'object_type': 'error_text',
                'text': 'failed to load triggers: %s' % oobjs
            })
            return (output_objects, returnvalues.SYSTEM_ERROR)

        # Generate variable helper values for a few concrete samples for help
        # text
        vars_html = ''
        dummy_rule = {'run_as': client_id, 'vgrid_name': vgrid_name}
        samples = [('input.txt', 'modified'), ('input/image42.raw', 'changed')]
        for (path, change) in samples:
            vgrid_path = os.path.join(vgrid_name, path)
            vars_html += "<b>Expanded variables when %s is %s:</b><br/>" % \
                (vgrid_path, change)
            expanded = get_path_expand_map(vgrid_path, dummy_rule, change)
            for (key, val) in expanded.items():
                vars_html += "    %s: %s<br/>" % (key, val)
        commands_html = ''
        commands = get_usage_map(configuration)
        for usage in commands.values():
            commands_html += "    %s<br/>" % usage

        helper_html = """
<div class='variables-accordion'>
<h4>Help on available trigger variable names and values</h4>
<p>
Triggers can use a number of helper variables on the form +TRIGGERXYZ+ to
dynamically act on targets. Some of the values are bound to the rule owner the
%s while the remaining ones are automatically expanded for the particular
trigger target as shown in the following examples:<br/>
%s
</p>
<h4>Help on available trigger commands and arguments</h4>
<p>
It is possible to set up trigger rules that basically run any operation with a
side effect you could manually do on %s. I.e. like submitting/cancelling
a job, creating/moving/deleting a file or directory and so on. When you select
'command' as the action for a trigger rule, you have the following commands at
your disposal:<br/>
%s
</p>
</div>
""" % (label, vars_html, configuration.short_title, commands_html)

        # Make page with manage triggers tab and active jobs and log tab

        output_objects.append({
            'object_type':
            'html_form',
            'text':
            '''
    <div id="wrap-tabs" class="workflow-tabs">
<ul>
<li><a href="#manage-tab">Manage Triggers</a></li>
<li><a href="#jobs-tab">Active Trigger Jobs</a></li>
</ul>
'''
        })

        # Display existing triggers and form to add new ones

        output_objects.append({
            'object_type': 'html_form',
            'text': '''
<div id="manage-tab">
'''
        })

        output_objects.append({
            'object_type': 'sectionheader',
            'text': 'Manage Triggers'
        })
        output_objects.extend(oobjs)
        output_objects.append({
            'object_type': 'html_form',
            'text': helper_html
        })

        if configuration.site_enable_crontab:
            output_objects.append({
                'object_type':
                'html_form',
                'text':
                '''
<p>You can combine these workflows with the personal '''
            })
            output_objects.append({
                'object_type': 'link',
                'destination': 'crontab.py',
                'class': 'crontablink iconspace',
                'text': 'schedule task'
            })
            output_objects.append({
                'object_type':
                'html_form',
                'text':
                '''
facilities in case you want to trigger flows at given times rather than only
in reaction to file system events.</p>
'''
            })
        output_objects.append({
            'object_type': 'html_form',
            'text': '''
</div>
'''
        })

        # Display active trigger jobs and recent logs for this vgrid

        output_objects.append({
            'object_type': 'html_form',
            'text': '''
    <div id="jobs-tab">
    '''
        })
        output_objects.append({
            'object_type': 'sectionheader',
            'text': 'Active Trigger Jobs'
        })
        output_objects.append({
            'object_type': 'table_pager',
            'entry_name': 'job',
            'default_entries': default_pager_entries
        })

    output_objects.append({
        'object_type': 'trigger_job_list',
        'trigger_jobs': trigger_jobs
    })

    if operation in show_operations:
        output_objects.append({
            'object_type': 'sectionheader',
            'text': 'Trigger Log'
        })

    output_objects.append({
        'object_type': 'trigger_log',
        'log_content': log_content
    })
    if operation in show_operations:
        output_objects.append({
            'object_type': 'html_form',
            'text': '''
</div>
'''
        })

        output_objects.append({
            'object_type': 'html_form',
            'text': '''
</div>
'''
        })
    return (output_objects, returnvalues.OK)
Exemple #48
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id)
    client_dir = client_id_dir(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
    )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    patterns = accepted['job_id']

    if not safe_handler(configuration, 'post', op_name, client_id,
                        get_csrf_limit(configuration), accepted):
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            '''Only accepting
CSRF-filtered POST requests to prevent unintended updates'''
        })
        return (output_objects, returnvalues.CLIENT_ERROR)

    if not configuration.site_enable_jobs:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            '''Job execution is not enabled on this system'''
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = \
        os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                        client_dir)) + os.sep

    status = returnvalues.OK
    filelist = []
    for pattern in patterns:
        pattern = pattern.strip()

        # Backward compatibility - all_jobs keyword should match all jobs

        if pattern == all_jobs:
            pattern = '*'

        # Check directory traversal attempts before actual handling to avoid
        # leaking information about file system layout while allowing
        # consistent error messages

        unfiltered_match = glob.glob(base_dir + pattern + '.mRSL')
        match = []
        for server_path in unfiltered_match:
            # IMPORTANT: path must be expanded to abs for proper chrooting
            abs_path = os.path.abspath(server_path)
            if not valid_user_path(configuration, abs_path, base_dir, True):

                # out of bounds - save user warning for later to allow
                # partial match:
                # ../*/* is technically allowed to match own files.

                logger.warning('%s tried to %s restricted path %s ! (%s)' %
                               (client_id, op_name, abs_path, pattern))
                continue

            # Insert valid job files in filelist for later treatment

            match.append(abs_path)

        # Now actually treat list of allowed matchings and notify if no
        # (allowed) match

        if not match:
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                '%s: You do not have any matching job IDs!' % pattern
            })
            status = returnvalues.CLIENT_ERROR
        else:
            filelist += match

    # job schedule is hard on the server, limit

    if len(filelist) > 100:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'Too many matching jobs (%s)!' % len(filelist)
        })
        return (output_objects, returnvalues.CLIENT_ERROR)

    saveschedulejobs = []

    for filepath in filelist:

        # Extract job_id from filepath (replace doesn't modify filepath)

        mrsl_file = filepath.replace(base_dir, '')
        job_id = mrsl_file.replace('.mRSL', '')

        saveschedulejob = {'object_type': 'saveschedulejob', 'job_id': job_id}

        dict = unpickle(filepath, logger)
        if not dict:
            saveschedulejob['message'] = \
                                       ('The file containing the information' \
                                        ' for job id %s could not be opened!' \
                                        ' You can only read schedule for ' \
                                        'your own jobs!') % job_id
            saveschedulejobs.append(saveschedulejob)
            status = returnvalues.CLIENT_ERROR
            continue

        saveschedulejob['oldstatus'] = dict['STATUS']

        # Is the job status pending?

        possible_schedule_states = ['QUEUED', 'RETRY', 'FROZEN']
        if not dict['STATUS'] in possible_schedule_states:
            saveschedulejob['message'] = \
                'You can only read schedule for jobs with status: %s.'\
                 % ' or '.join(possible_schedule_states)
            saveschedulejobs.append(saveschedulejob)
            continue

        # notify queue

        if not send_message_to_grid_script('JOBSCHEDULE ' + job_id + '\n',
                                           logger, configuration):
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                'Error sending message to grid_script, update may fail.'
            })
            status = returnvalues.SYSTEM_ERROR
            continue

        saveschedulejobs.append(saveschedulejob)

    savescheduleinfo = """Please find any available job schedule status in
verbose job status output."""
    output_objects.append({
        'object_type': 'saveschedulejobs',
        'saveschedulejobs': saveschedulejobs,
        'savescheduleinfo': savescheduleinfo
    })
    return (output_objects, status)
    o.reply_and_exit(o.ERROR)

# Check that resource address matches request source
# TODO: get real ip and enable this check
# remote_ip = str(os.getenv("REMOTE_ADDR"))
# resource_ip = "0.0.0.0"
# if remote_ip != resource_ip:
#    print "Warning: job request not sent from expected resource address!"
#    logger.warning("job request not issued from address of resource! (%s != %s)", remote_ip, resource_ip)

# TODO: check that the person who submitted the job (where the session ID points) is also the one that submitted the
# received jobid (to avoid a verified user specifies another users job id)

mrslfile = configuration.sessid_to_mrsl_link_home + sessionid + '.mRSL'

mrsldict = unpickle(mrslfile, logger)

if not mrsldict:
    o.out('requestinteractivejob error! Could not open mrsl file')
    o.reply_and_exit(o.ERROR)

job_submitter_client_id = mrsldict['USER_CERT']
o.out('job_submitter_client_id: %s' % job_submitter_client_id)

mrsl_jobid = mrsldict['JOB_ID']
if not jobid == mrsl_jobid:
    o.out('requestinteractivejob error! Wrong job_id specified!')
    o.reply_and_exit(o.ERROR)

# TODO: check the status of the specified job(id) and verify it has not previously been executed.
# The status must be ? (What about RETRY?)
Exemple #50
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id)
    client_dir = client_id_dir(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
    )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    patterns = accepted['job_id']
    action = accepted['action'][-1]

    if not safe_handler(configuration, 'post', op_name, client_id,
                        get_csrf_limit(configuration), accepted):
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            '''Only accepting
CSRF-filtered POST requests to prevent unintended updates'''
        })
        return (output_objects, returnvalues.CLIENT_ERROR)

    if not configuration.site_enable_jobs:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            '''Job execution is not enabled on this system'''
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    if not action in valid_actions.keys():
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'Invalid job action "%s" (only %s supported)' %
            (action, ', '.join(valid_actions.keys()))
        })
        return (output_objects, returnvalues.CLIENT_ERROR)

    new_state = valid_actions[action]

    # Please note that base_dir must end in slash to avoid access to other
    # user dirs when own name is a prefix of another user name

    base_dir = \
        os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                        client_dir)) + os.sep

    status = returnvalues.OK
    filelist = []
    for pattern in patterns:
        pattern = pattern.strip()

        # Backward compatibility - all_jobs keyword should match all jobs

        if pattern == all_jobs:
            pattern = '*'

        # Check directory traversal attempts before actual handling to avoid
        # leaking information about file system layout while allowing
        # consistent error messages

        unfiltered_match = glob.glob(base_dir + pattern + '.mRSL')
        match = []
        for server_path in unfiltered_match:
            # IMPORTANT: path must be expanded to abs for proper chrooting
            abs_path = os.path.abspath(server_path)
            if not valid_user_path(configuration, abs_path, base_dir, True):

                # out of bounds - save user warning for later to allow
                # partial match:
                # ../*/* is technically allowed to match own files.

                logger.error(
                    '%s tried to use %s %s outside own home! (pattern %s)' %
                    (client_id, op_name, abs_path, pattern))
                continue

            # Insert valid job files in filelist for later treatment

            match.append(abs_path)

        # Now actually treat list of allowed matchings and notify if no
        # (allowed) match

        if not match:
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                '%s: You do not have any matching job IDs!' % pattern
            })
            status = returnvalues.CLIENT_ERROR
        else:
            filelist += match

    # job state change is hard on the server, limit

    if len(filelist) > 500:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'Too many matching jobs (%s)!' % len(filelist)
        })
        return (output_objects, returnvalues.CLIENT_ERROR)

    changedstatusjobs = []

    for filepath in filelist:

        # Extract job_id from filepath (replace doesn't modify filepath)

        mrsl_file = filepath.replace(base_dir, '')
        job_id = mrsl_file.replace('.mRSL', '')

        changedstatusjob = {
            'object_type': 'changedstatusjob',
            'job_id': job_id
        }

        job_dict = unpickle(filepath, logger)
        if not job_dict:
            changedstatusjob['message'] = '''The file containing the
information for job id %s could not be opened! You can only %s your own
jobs!''' % (job_id, action)
            changedstatusjobs.append(changedstatusjob)
            status = returnvalues.CLIENT_ERROR
            continue

        changedstatusjob['oldstatus'] = job_dict['STATUS']

        # Is the job status compatible with action?

        possible_cancel_states = [
            'PARSE', 'QUEUED', 'RETRY', 'EXECUTING', 'FROZEN'
        ]
        if action == 'cancel' and \
               not job_dict['STATUS'] in possible_cancel_states:
            changedstatusjob['message'] = \
                'You can only cancel jobs with status: %s.'\
                 % ' or '.join(possible_cancel_states)
            status = returnvalues.CLIENT_ERROR
            changedstatusjobs.append(changedstatusjob)
            continue
        possible_freeze_states = ['QUEUED', 'RETRY']
        if action == 'freeze' and \
               not job_dict['STATUS'] in possible_freeze_states:
            changedstatusjob['message'] = \
                'You can only freeze jobs with status: %s.'\
                 % ' or '.join(possible_freeze_states)
            status = returnvalues.CLIENT_ERROR
            changedstatusjobs.append(changedstatusjob)
            continue
        possible_thaw_states = ['FROZEN']
        if action == 'thaw' and \
               not job_dict['STATUS'] in possible_thaw_states:
            changedstatusjob['message'] = \
                'You can only thaw jobs with status: %s.'\
                 % ' or '.join(possible_thaw_states)
            status = returnvalues.CLIENT_ERROR
            changedstatusjobs.append(changedstatusjob)
            continue

        # job action is handled by changing the STATUS field, notifying the
        # job queue and making sure the server never submits jobs with status
        # FROZEN or CANCELED.

        # file is repickled to ensure newest information is used, job_dict
        # might be old if another script has modified the file.

        if not unpickle_and_change_status(filepath, new_state, logger):
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                'Job status could not be changed to %s!' % new_state
            })
            status = returnvalues.SYSTEM_ERROR

        # Avoid key error and make sure grid_script gets expected number of
        # arguments

        if not job_dict.has_key('UNIQUE_RESOURCE_NAME'):
            job_dict['UNIQUE_RESOURCE_NAME'] = \
                'UNIQUE_RESOURCE_NAME_NOT_FOUND'
        if not job_dict.has_key('EXE'):
            job_dict['EXE'] = 'EXE_NAME_NOT_FOUND'

        # notify queue

        if not send_message_to_grid_script(
                'JOBACTION ' + job_id + ' ' + job_dict['STATUS'] + ' ' +
                new_state + ' ' + job_dict['UNIQUE_RESOURCE_NAME'] + ' ' +
                job_dict['EXE'] + '\n', logger, configuration):
            output_objects.append({
                'object_type':
                'error_text',
                'text':
                '''Error sending message to grid_script,
job may still be in the job queue.'''
            })
            status = returnvalues.SYSTEM_ERROR
            continue

        changedstatusjob['newstatus'] = new_state
        changedstatusjobs.append(changedstatusjob)

    output_objects.append({
        'object_type': 'changedstatusjobs',
        'changedstatusjobs': changedstatusjobs
    })
    return (output_objects, status)
Exemple #51
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""

    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id, op_header=False)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
        )
    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    vgrid_name = accepted['vgrid_name'][-1]

    if not vgrid_is_owner_or_member(vgrid_name, client_id,
                                    configuration):
        output_objects.append({'object_type': 'error_text',
                              'text': '''You must be an owner or member of %s vgrid to
access the workflows.'''
                               % vgrid_name})
        return (output_objects, returnvalues.CLIENT_ERROR)

    title_entry = find_entry(output_objects, 'title')
    title_entry['text'] = '%s Workflows' \
        % configuration.site_vgrid_label
    title_entry['style'] = themed_styles(configuration)
    title_entry['javascript'] = \
        '''
<script type="text/javascript" src="/images/js/jquery.js"></script>
<script type="text/javascript" src="/images/js/jquery.tablesorter.js"></script>
<script type="text/javascript" src="/images/js/jquery.tablesorter.pager.js"></script>
<script type="text/javascript" src="/images/js/jquery.tablesorter.widgets.js"></script>
<script type="text/javascript" src="/images/js/jquery-ui.js"></script>

<script type="text/javascript">
$(document).ready(function() {

          $("#logarea").scrollTop($("#logarea")[0].scrollHeight);

          // table initially sorted by 0 (last update / date) 
          var sortOrder = [[0,1]];

          // use image path for sorting if there is any inside
          var imgTitle = function(contents) {
              var key = $(contents).find("a").attr("class");
              if (key == null) {
                  key = $(contents).html();
              }
              return key;
          }

          $("#workflowstable").tablesorter({widgets: ["zebra", "saveSort"],
                                        sortList:sortOrder,
                                        textExtraction: imgTitle
                                        })
                               .tablesorterPager({ container: $("#pager"),
                                        size: %s
                                        });
     }
);
</script>
''' \
        % default_pager_entries

    output_objects.append({'object_type': 'html_form',
                          'text': '''
 <div id="confirm_dialog" title="Confirm" style="background:#fff;">
  <div id="confirm_text"><!-- filled by js --></div>
   <textarea cols="72" rows="10" id="confirm_input" style="display:none;"></textarea>
 </div>
'''})

    output_objects.append({'object_type': 'header',
                          'text': '%s Workflows for %s'
                          % (configuration.site_vgrid_label,
                          vgrid_name)})

    logger.info('vgridworkflows %s' % vgrid_name)

    # Display active trigger jobs for this vgrid

    output_objects.append({'object_type': 'sectionheader',
                          'text': 'Active Trigger Jobs'})
    html = '<table><thead><tr>'
    html += '<th>Job ID</th>'
    html += '<th>Rule</th>'
    html += '<th>Path</th>'
    html += '<th>Change</th>'
    html += '<th>Time</th>'
    html += '<th>Status</th>'
    html += '</tr></thead>'
    html += '<tbody>'

    trigger_job_dir = os.path.join(configuration.vgrid_home,
                                   os.path.join(vgrid_name, '.%s.jobs'
                                   % configuration.vgrid_triggers))
    trigger_job_pending_dir = os.path.join(trigger_job_dir,
            'pending_states')
    trigger_job_final_dir = os.path.join(trigger_job_dir, 'final_states'
            )

    if makedirs_rec(trigger_job_pending_dir, logger) \
        and makedirs_rec(trigger_job_final_dir, logger):
        abs_vgrid_dir = '%s/' \
            % os.path.abspath(os.path.join(configuration.vgrid_files_home,
                              vgrid_name))
        for filename in os.listdir(trigger_job_pending_dir):
            trigger_job_filepath = \
                os.path.join(trigger_job_pending_dir, filename)
            trigger_job = unpickle(trigger_job_filepath, logger)
            serverjob_filepath = \
                os.path.join(configuration.mrsl_files_dir,
                             os.path.join(client_id_dir(trigger_job['owner'
                             ]), '%s.mRSL' % trigger_job['jobid']))
            serverjob = unpickle(serverjob_filepath, logger)
            if serverjob:
                if serverjob['STATUS'] in pending_states:
                    trigger_event = trigger_job['event']
                    trigger_rule = trigger_job['rule']
                    trigger_action = trigger_event['event_type']
                    trigger_time = time.ctime(trigger_event['time_stamp'
                            ])
                    trigger_path = '%s %s' % (trigger_event['src_path'
                            ].replace(abs_vgrid_dir, ''),
                            trigger_event['dest_path'
                            ].replace(abs_vgrid_dir, ''))
                    html += \
                        '<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></td><td>%s</td>' \
                        % (trigger_job['jobid'], trigger_rule['rule_id'
                           ], trigger_path, trigger_action, trigger_time,
                           serverjob['STATUS'])
                elif serverjob['STATUS'] in final_states:
                    src_path = os.path.join(trigger_job_pending_dir,
                            filename)
                    dest_path = os.path.join(trigger_job_final_dir,
                            filename)
                    move_file(src_path, dest_path, configuration)
                else:
                    logger.error('Trigger job: %s, unknown state: %s'
                                 % (trigger_job['jobid'],
                                 serverjob['STATUS']))
    html += '</tbody>'
    html += '</table>'
    output_objects.append({'object_type': 'html_form', 'text': html})

    # Display active trigger jobs for this vgrid

    output_objects.append({'object_type': 'sectionheader',
                          'text': 'Trigger Log'})
    log_content = read_trigger_log(configuration, vgrid_name)
    output_objects.append({'object_type': 'html_form',
                          'text': '''
 <div class="form_container">
 <textarea id="logarea" rows=10 readonly="readonly">%s</textarea>
 </div>
 '''
                          % log_content})

    output_objects.append({'object_type': 'sectionheader',
                          'text': 'Manage Triggers'})

    # Always run as rule creator to avoid users being able to act on behalf
    # of ANY other user using triggers (=exploit)

    extra_fields = [
        ('path', None),
        ('match_dirs', ['False', 'True']),
        ('match_recursive', ['False', 'True']),
        ('changes', [keyword_all] + valid_trigger_changes),
        ('action', [keyword_auto] + valid_trigger_actions),
        ('arguments', None),
        ('run_as', client_id),
        ]

    # NOTE: we do NOT show saved template contents - see addvgridtriggers

    optional_fields = [('rate_limit', None), ('settle_time', None)]

    (status, oobjs) = vgrid_add_remove_table(
        client_id,
        vgrid_name,
        'trigger',
        'vgridtrigger',
        configuration,
        extra_fields,
        optional_fields,
        )
    output_objects.extend(oobjs)

    if not status:
        return (output_objects, returnvalues.SYSTEM_ERROR)

    return (output_objects, returnvalues.OK)
Exemple #52
0
        # Traverse mRSL dir and update cache

        for (root, _, files) in os.walk(root_dir, topdown=True):

            # skip all dot dirs - they are from repos etc and _not_ jobs

            if root.find(os.sep + '.') != -1:
                continue
            for name in files:
                filename = os.path.join(root, name)

                # Only files modified since last update is checked

                if os.path.getmtime(filename) > last_buildtime:
                    job_dict = unpickle(filename, self.__logger)
                    if not job_dict:
                        msg = 'gridstat::update() could not load: %s '\
                             % filename
                        self.__logger.error(msg)
                        continue

                    job_vgrids = validated_vgrid_list(self.__configuration,
                                                      job_dict)

                    for job_vgrid_name in job_vgrids:

                        # Update the statistics and cache
                        # from the job details

                        job_vgrid_name = job_vgrid_name.upper()
Exemple #53
0
def create_monitor(vgrid_name):
    """Write monitor HTML file for vgrid_name"""

    html_file = os.path.join(configuration.vgrid_home, vgrid_name,
                             '%s.html' % configuration.vgrid_monitor)

    print 'collecting statistics for VGrid %s' % vgrid_name
    sleep_secs = configuration.sleep_secs
    slackperiod = configuration.slackperiod
    now = time.asctime(time.localtime())

    html_vars = {
        'sleep_secs': sleep_secs,
        'vgrid_name': vgrid_name,
        'logo_url': '/images/logo.jpg',
        'now': now,
        'short_title': configuration.short_title,
    }

    monitor_meta = '''<meta http-equiv="refresh" content="%(sleep_secs)s" />
''' % html_vars
    add_import = '''
<script type="text/javascript" src="/images/js/jquery.tablesorter.js"></script>
    '''
    add_init = ''
    add_ready = '''
          // table initially sorted by col. 1 (name)
          var sortOrder = [[1,0]];

          // use image path for sorting if there is any inside
          var imgTitle = function(contents) {
              var key = $(contents).find("a").attr("class");
              if (key == null) {
                  key = $(contents).html();
              }
              return key;
          }
          $("table.monitor").tablesorter({widgets: ["zebra"],
                                          textExtraction: imgTitle,
                                         });
          $("table.monitor").each(function () {
              try {
                  $(this).trigger("sorton", [sortOrder]);
              } catch(err) {
                  /* tablesorter chokes on empty tables - just continue */
              }
          });
    '''
    monitor_js = '''
%s

<script type="text/javascript" >

%s

$(document).ready(function() {
%s
          }
);
</script>
''' % (add_import, add_init, add_ready)

    # User default site style
    style_helpers = themed_styles(configuration)
    script_helpers = themed_scripts(configuration)
    script_helpers['advanced'] += add_import
    script_helpers['init'] += add_init
    script_helpers['ready'] += add_ready
    html = get_xgi_html_header(
        configuration,
        '%(short_title)s Monitor, VGrid %(vgrid_name)s' % html_vars,
        '',
        html=True,
        meta=monitor_meta,
        style_map=style_helpers,
        script_map=script_helpers,
        frame=False,
        menu=False,
        widgets=False,
        userstyle=False,
    )
    html += \
        '''
<!-- end of raw header: this line is used by showvgridmonitor -->
<h1>Statistics/monitor for the %(vgrid_name)s VGrid</h1>
<div class="generatornote smallcontent">
This page was generated %(now)s (automatic refresh every %(sleep_secs)s secs).
</div>
'''\
         % html_vars

    # loop and get totals

    parse_count = 0
    queued_count = 0
    frozen_count = 0
    executing_count = 0
    finished_count = 0
    failed_count = 0
    retry_count = 0
    canceled_count = 0

    cpucount_requested = 0
    cpucount_done = 0
    nodecount_requested = 0
    nodecount_done = 0
    cputime_requested = 0
    cputime_done = 0
    used_walltime = 0
    disk_requested = 0
    disk_done = 0
    memory_requested = 0
    memory_done = 0
    runtimeenv_dict = {'': 0}
    runtimeenv_requested = 0
    runtimeenv_done = 0

    number_of_jobs = 0
    up_count = 0
    down_count = 0
    slack_count = 0

    job_assigned = 0
    job_assigned_cpus = 0

    gstat = GridStat(configuration, logger)

    runtimeenv_dict = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                      'RUNTIMEENVIRONMENT', {})

    parse_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), 'PARSE')
    queued_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), 'QUEUED')
    frozen_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), 'FROZEN')
    executing_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                      'EXECUTING')
    failed_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), 'FAILED')
    retry_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), 'RETRY')
    canceled_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                     'CANCELED')
    expired_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), 'EXPIRED')
    finished_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                     'FINISHED')

    nodecount_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                          'NODECOUNT_REQ')
    nodecount_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                     'NODECOUNT_DONE')
    cputime_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                        'CPUTIME_REQ')
    cputime_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                   'CPUTIME_DONE')

    used_walltime = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                    'USED_WALLTIME')

    if (used_walltime == 0):
        used_walltime = datetime.timedelta(0)

    used_walltime = format_timedelta(used_walltime)

    disk_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                     'DISK_REQ')
    disk_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(), 'DISK_DONE')
    memory_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                       'MEMORY_REQ')
    memory_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                  'MEMORY_DONE')
    cpucount_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                         'CPUCOUNT_REQ')
    cpucount_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                    'CPUCOUNT_DONE')
    runtimeenv_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                           'RUNTIMEENVIRONMENT_REQ')
    runtimeenv_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(),
                                      'RUNTIMEENVIRONMENT_DONE')

    number_of_jobs = parse_count
    number_of_jobs += queued_count
    number_of_jobs += frozen_count
    number_of_jobs += expired_count
    number_of_jobs += canceled_count
    number_of_jobs += failed_count
    number_of_jobs += executing_count
    number_of_jobs += finished_count
    number_of_jobs += retry_count

    html_vars = {
        'parse_count': parse_count,
        'queued_count': queued_count,
        'frozen_count': frozen_count,
        'executing_count': executing_count,
        'failed_count': failed_count,
        'retry_count': retry_count,
        'canceled_count': canceled_count,
        'expired_count': expired_count,
        'finished_count': finished_count,
        'number_of_jobs': number_of_jobs,
        'cpucount_requested': cpucount_requested,
        'cpucount_done': cpucount_done,
        'nodecount_requested': nodecount_requested,
        'nodecount_done': nodecount_done,
        'cputime_requested': cputime_requested,
        'cputime_done': cputime_done,
        'used_walltime': used_walltime,
        'disk_requested': disk_requested,
        'disk_done': disk_done,
        'memory_requested': memory_requested,
        'memory_done': memory_done,
        'runtimeenv_requested': runtimeenv_requested,
        'runtimeenv_done': runtimeenv_done,
    }

    html += \
        """<h2>Job Stats</h2><table class=monitorstats><tr><td>
<table class=monitorjobs><tr class=title><td>Job State</td><td>Number of jobs</td></tr>
<tr><td>Parse</td><td>%(parse_count)s</td></tr>
<tr><td>Queued</td><td>%(queued_count)s</td></tr>
<tr><td>Frozen</td><td>%(frozen_count)s</td></tr>
<tr><td>Executing</td><td>%(executing_count)s</td></tr>
<tr><td>Failed</td><td>%(failed_count)s</td></tr>
<tr><td>Retry</td><td>%(retry_count)s</td></tr>
<tr><td>Canceled</td><td>%(canceled_count)s</td></tr>
<tr><td>Expired</td><td>%(expired_count)s</td></tr>
<tr><td>Finished</td><td>%(finished_count)s</td></tr>
<tr><td>Total</td><td>%(number_of_jobs)s</td></tr>
</table>
</td><td>
<table class=monitorresreq>
<tr class=title><td>Requirement</td><td>Requested</td><td>Done</td></tr>
<tr><td>Cpucount</td><td>%(cpucount_requested)s</td><td>%(cpucount_done)s</td></tr>
<tr><td>Nodecount</td><td>%(nodecount_requested)s</td><td>%(nodecount_done)s</td></tr>
<tr><td>Cputime</td><td>%(cputime_requested)s</td><td>%(cputime_done)s</td></tr>
<tr><td>GB Disk</td><td>%(disk_requested)s</td><td>%(disk_done)s</td></tr>
<tr><td>MB Memory</td><td>%(memory_requested)s</td><td>%(memory_done)s</td></tr>
<tr><td>Runtime Envs</td><td>%(runtimeenv_requested)s</td><td>%(runtimeenv_done)s</td></tr>
<tr><td>Used Walltime</td><td colspan='2'>%(used_walltime)s</td></tr>
</table><br />
</td><td>
<div class=monitorruntimeenvdetails>
<table class=monitorruntimeenvdone>
<tr class=title><td>Runtime Envs Done</td><td></td></tr>
"""\
         % html_vars

    if len(runtimeenv_dict.keys()) < 1:

        # No runtimeenv requests

        html += '<tr><td></td><td>-</td></tr>\n'
    else:
        for entry in runtimeenv_dict.keys():
            if not entry == '':
                html += '<tr><td>' + entry + '</td><td>'\
                    + str(runtimeenv_dict[entry]) + '</td></tr>\n'

    total_number_of_exe_resources, total_number_of_store_resources = 0, 0
    total_number_of_exe_cpus, total_number_of_store_gigs = 0, 0

    vgrid_name_list = vgrid_name.split('/')
    current_dir = ''

    exes, stores = '', ''
    for vgrid_name_part in vgrid_name_list:
        current_dir = os.path.join(current_dir, vgrid_name_part)
        abs_mon_dir = os.path.join(configuration.vgrid_home, current_dir)
        # print 'dir: %s' % abs_mon_dir
        # Potential race - just ignore if it disappeared
        try:
            sorted_names = os.listdir(abs_mon_dir)
        except OSError:
            continue
        sorted_names.sort()
        for filename in sorted_names:
            # print filename
            if filename.startswith('monitor_last_request_'):

                # read last request helper file

                mon_file_name = os.path.join(abs_mon_dir, filename)
                print 'found ' + mon_file_name
                last_request_dict = unpickle(mon_file_name, logger)
                if not last_request_dict:
                    print 'could not open and unpickle: '\
                        + mon_file_name
                    continue
                if not last_request_dict.has_key('CREATED_TIME'):
                    print 'skip broken last request dict: '\
                        + mon_file_name
                    continue

                difference = datetime.datetime.now()\
                    - last_request_dict['CREATED_TIME']
                days = str(difference.days)
                hours = str(difference.seconds / 3600)
                minutes = str((difference.seconds % 3600) / 60)
                seconds = str((difference.seconds % 60) % 60)

                last_timetuple = last_request_dict['CREATED_TIME'].timetuple()

                if last_request_dict.has_key('CPUTIME'):
                    cputime = last_request_dict['CPUTIME']
                elif last_request_dict.has_key('cputime'):
                    cputime = last_request_dict['cputime']
                else:
                    print 'ERROR: last request does not contain cputime field!: %s'\
                        % last_request_dict
                    continue

                try:
                    cpusec = int(cputime)
                except ValueError:
                    try:
                        cpusec = int(float(cputime))
                    except ValueError, verr:
                        print 'ERROR: failed to parse cputime %s: %s'\
                            % (cputime, verr)

                # Include execution delay guesstimate for strict fill
                # LRMS resources

                try:
                    delay = int(last_request_dict['EXECUTION_DELAY'])
                except KeyError:
                    delay = 0
                except ValueError:
                    delay = 0

                time_remaining = (last_request_dict['CREATED_TIME']
                                  + datetime.timedelta(seconds=cpusec)
                                  + datetime.timedelta(seconds=delay))\
                    - datetime.datetime.now()
                days_rem = str(time_remaining.days)
                hours_rem = str(time_remaining.seconds / 3600)
                minutes_rem = str((time_remaining.seconds % 3600) / 60)
                seconds_rem = str((time_remaining.seconds % 60) % 60)

                if time_remaining.days < -7:
                    try:
                        print 'removing: %s as we havent seen him for %s days.'\
                            % (mon_file_name, abs(time_remaining).days)
                        os.remove(mon_file_name)
                    except Exception, err:
                        print "could not remove: '%s' Error: %s"\
                            % (mon_file_name, str(err))
                    pass
                else:
                    unique_res_name_and_exe_list = \
                        filename.split('monitor_last_request_', 1)
                    if cpusec == 0:
                        resource_status = 'unavailable'
                    elif time_remaining.days < 0:

                        # time_remaining.days < 0 means that we have passed the specified time

                        time_rem_abs = abs(time_remaining)
                        if time_rem_abs.days == 0\
                                and int(time_rem_abs.seconds)\
                                < int(slackperiod):
                            resource_status = 'slack'
                            slack_count = slack_count + 1
                        else:
                            resource_status = 'offline'
                            down_count = down_count + 1
                    else:
                        resource_status = 'online'
                        up_count = up_count + 1

                    exes += '<tr>'
                    exes += \
                        '<td><img src=/images/status-icons/%s.png /></td>'\
                        % resource_status
                    public_id = unique_res_name_and_exe_list[1]
                    if last_request_dict['RESOURCE_CONFIG'].get(
                            'ANONYMOUS', True):
                        public_id = anon_resource_id(public_id)
                    public_name = last_request_dict['RESOURCE_CONFIG'].get(
                        'PUBLICNAME', '')
                    resource_parts = public_id.split('_', 2)
                    resource_name = "<a href='viewres.py?unique_resource_name=%s'>%s</a>" % \
                                    (resource_parts[0], resource_parts[0])
                    if public_name:
                        resource_name += "<br />(alias %s)" % public_name
                    else:
                        resource_name += "<br />(no alias)"
                    resource_name += "<br />%s" % resource_parts[1]
                    exes += '<td>%s</td>' % resource_name

                    last_asctime = time.asctime(last_timetuple)
                    last_epoch = time.mktime(last_timetuple)
                    exes += '<td><div class="sortkey">%s</div>%s<br />' %  \
                            (last_epoch, last_asctime)
                    exes += '(%sd %sh %sm %ss ago)</td>' % (days, hours,
                                                            minutes, seconds)
                    exes += '<td>' + vgrid_name + '</td>'
                    runtime_envs = last_request_dict['RESOURCE_CONFIG'][
                        'RUNTIMEENVIRONMENT']
                    runtime_envs.sort()
                    re_list_text = ', '.join([i[0] for i in runtime_envs])
                    exes += '<td title="%s">' % re_list_text \
                        + str(len(runtime_envs)) + '</td>'
                    exes += '<td>'\
                        + str(last_request_dict['RESOURCE_CONFIG'
                                                ]['CPUTIME']) + '</td><td>'\
                        + str(last_request_dict['RESOURCE_CONFIG'
                                                ]['NODECOUNT']) + '</td><td>'\
                        + str(last_request_dict['RESOURCE_CONFIG'
                                                ]['CPUCOUNT']) + '</td><td>'\
                        + str(last_request_dict['RESOURCE_CONFIG'
                                                ]['DISK']) + '</td><td>'\
                        + str(last_request_dict['RESOURCE_CONFIG'
                                                ]['MEMORY']) + '</td><td>'\
                        + str(last_request_dict['RESOURCE_CONFIG'
                                                ]['ARCHITECTURE']) + '</td>'
                    exes += '<td>' + last_request_dict['STATUS']\
                        + '</td><td>' + str(last_request_dict['CPUTIME'
                                                              ]) + '</td>'

                    exes += '<td class=status_%s>' % resource_status
                    if 'unavailable' == resource_status:
                        exes += '-'
                    elif 'slack' == resource_status:
                        exes += 'Within slack period (%s < %s secs)'\
                            % (time_rem_abs.seconds, slackperiod)
                    elif 'offline' == resource_status:
                        exes += 'down?'
                    else:
                        exes += '%sd, %sh, %sm, %ss'\
                            % (days_rem, hours_rem, minutes_rem,
                                seconds_rem)
                    exes += '</td>'

                    exes += '</tr>\n'
                    if last_request_dict['STATUS'] == 'Job assigned':
                        job_assigned = job_assigned + 1
                        job_assigned_cpus = job_assigned_cpus\
                            + int(last_request_dict['RESOURCE_CONFIG'
                                                    ]['NODECOUNT'])\
                            * int(last_request_dict['RESOURCE_CONFIG'
                                                    ]['CPUCOUNT'])

                    total_number_of_exe_resources += 1
                    total_number_of_exe_cpus += int(
                        last_request_dict['RESOURCE_CONFIG']['NODECOUNT']) \
                        * int(last_request_dict['RESOURCE_CONFIG']['CPUCOUNT'])
            elif filename.startswith('monitor_last_status_'):

                # store must be linked to this vgrid, not only parent vgrid:
                # inheritance only covers access, not automatic participation

                if current_dir != vgrid_name:
                    continue

                # read last resource action status file

                mon_file_name = os.path.join(abs_mon_dir, filename)
                print 'found ' + mon_file_name
                last_status_dict = unpickle(mon_file_name, logger)
                if not last_status_dict:
                    print 'could not open and unpickle: '\
                        + mon_file_name
                    continue
                if not last_status_dict.has_key('CREATED_TIME'):
                    print 'skip broken last request dict: '\
                        + mon_file_name
                    continue

                difference = datetime.datetime.now()\
                    - last_status_dict['CREATED_TIME']
                days = str(difference.days)
                hours = str(difference.seconds / 3600)
                minutes = str((difference.seconds % 3600) / 60)
                seconds = str((difference.seconds % 60) % 60)

                if last_status_dict['STATUS'] == 'stopped':
                    time_stopped = datetime.datetime.now() - \
                        last_status_dict['CREATED_TIME']
                    if time_stopped.days > 7:
                        try:
                            print 'removing: %s as we havent seen him for %s days.'\
                                  % (mon_file_name, abs(time_stopped).days)
                            os.remove(mon_file_name)
                        except Exception, err:
                            print "could not remove: '%s' Error: %s"\
                                  % (mon_file_name, str(err))
                        continue
Exemple #54
0
def check_mrsl_files(
    configuration,
    job_queue,
    executing_queue,
    only_new,
    logger,
    ):
    """Check job files on disk in order to initialize job queue after
    (re)start of grid_script.
    """

    # We only check files modified since last start if possible

    last_start = 0
    last_start_file = os.path.join(configuration.mig_system_files,
                                   'grid_script_laststart')
    if os.path.exists(last_start_file):
        last_start = os.path.getmtime(last_start_file)

    check_mrsl_files_start_time = time.time()

    # TODO: switch to listdir or glob? all files are in mrsl_files_dir/*/*.mRSL
    
    for (root, _, files) in os.walk(configuration.mrsl_files_dir):

        # skip all dot dirs - they are from repos etc and _not_ jobs

        if root.find(os.sep + '.') != -1:
            continue

        # skip all dirs without any recent changes

        if only_new and os.path.getmtime(root) < last_start:
            logger.info('check mRSL files: skipping unchanged dir: %s' % root)
            continue
        
        logger.info('check mRSL files: inspecting %d files in %s' % \
                    (len(files), root))
        file_count = 0
        for name in files:
            filename = os.path.join(root, name)
            file_count += 1
            if file_count % 1000 == 0:
                logger.info('check mRSL files: %d files in %s checked' % \
                            (file_count, root))
            if os.path.getmtime(filename) < last_start:
                if only_new:
                    #logger.debug('skipping treated mrsl file: %s'
                    #             % filename)
                    continue
                logger.info('parsing possibly outdated mrsl file: %s'
                             % filename)

            job_dict = io.unpickle(filename, logger)
            if not job_dict:
                logger.error('could not open and unpickle: %s' % filename)
                continue

            if job_dict['STATUS'] == 'PARSE':

                # parse is ok, since mRSL file exists
                # tell 'grid_script' and let grid_script put it into the queue

                logger.info('Found a file with PARSE status: %s'
                             % job_dict['JOB_ID'])
                job_id = job_dict['JOB_ID']
                client_id = job_dict['USER_CERT']
                client_dir = client_id_dir(client_id)
                message = 'USERJOBFILE %s/%s\n' % (client_dir, job_id)
                if not send_message_to_grid_script(message, logger,
                        configuration):
                    print 'Fatal error: Could not write to grid stdin'
            elif job_dict['STATUS'] == 'QUEUED'\
                 and not job_queue.get_job_by_id(job_dict['JOB_ID']):

                # put in job queue

                logger.info('USERJOBFILE: There were %s jobs in the job_queue'
                             % job_queue.queue_length())
                job_queue.enqueue_job(job_dict,
                        job_queue.queue_length())
                logger.info("Now there's %s (QUEUED job %s added)"
                             % (job_queue.queue_length(),
                            job_dict['JOB_ID']))
            elif job_dict['STATUS'] == 'EXECUTING'\
                 and not executing_queue.get_job_by_id(job_dict['JOB_ID'
                    ]):

                # put in executing queue

                logger.info('USERJOBFILE: There were %s jobs in the executing_queue'
                             % executing_queue.queue_length())
                executing_queue.enqueue_job(job_dict,
                        executing_queue.queue_length())
                logger.info("Now there's %s (EXECUTING job %s added)"
                             % (executing_queue.queue_length(),
                            job_dict['JOB_ID']))
            else:
                # logger.debug('Job in %s is already treated' % filename)
                continue

    # update last_start_file access times. Note the timestamp is not "now" but
    # when check_mrsl_files was called to avoid loosing any jobs being parsed
    # at the same time as this function is running.

    logger.info('setting time of last_start_file %s to %s'
                 % (last_start_file, check_mrsl_files_start_time))
    io.touch(last_start_file, check_mrsl_files_start_time)
    check_mrsl_files_end_time = time.time()
    logger.info('finished checking for mRSL files in %fs' % \
                (check_mrsl_files_end_time-check_mrsl_files_start_time))
Exemple #55
0
def vms_list(client_id, configuration):
    """Returns a list of dicts describing available user virtual machines
    described by the keys from default_vm_specs and the additional fields:
     
    'name', 'status', 'uuid', 'execution_time' and 'path'
 
    NOTE:

      The current state management does not fully exploit the powers of the
      grid, it only allows one 'instance' of the virtual machine to be
      running. But in practice a user could fairly use multiple instances
      of the same virtual machine. Using this basic model is feasible
      since the job submission is controlled via the web interface. It
      will however break if the user manually submits her own job.
      
      Currently two ways of deploying machines to resources exist:
      - By using VirtualBox frontend (work of Tomas)
      - By using webinterface (work of Simon)
      
      The storage of virtual machines are based on xml files (deployed by
      virtualbox)
      or ini files when deployed by MiG. This library supports both and the
      logic is separated into functions where appropriate.
    """

    # Grab the base directory of the user

    client_dir = client_id_dir(client_id)
    user_home = os.path.abspath(os.path.join(configuration.user_home,
                                             client_dir))
    mrsl_files_dir = os.path.abspath(os.path.join(configuration.mrsl_files_dir,
                                                  client_dir))

    # Append the virtual machine directory

    vms_paths = glob(os.path.join(user_home, vm_base, '*', '*.cfg'))

    # List of virtual machines

    vms = []

    for vm_def_path in vms_paths:
        machine = {}
        machine_defaults = default_vm_specs(configuration)
        machine_state = {
            'name': 'UNKNOWN',
            'path': os.path.abspath(vm_def_path),
            'status': 'UNKNOWN',
            'execution_time': 'UNKNOWN',
            'job_id': 'UNKNOWN',
            'uuid': 'UNKNOWN',
            }
        machine.update(machine_defaults)
        machine.update(machine_state)

        # Grab the configuration file defining the machine

        vm_def_base = os.path.basename(os.path.dirname(vm_def_path))

        vm_config = ConfigParser.ConfigParser()
        vm_config.read([vm_def_path])

        machine['name'] = vm_def_base
        # override defaults with conf values
        for key in machine_defaults.keys():
            if vm_config.has_option('MiG', key):
                machine[key] = vm_config.get('MiG', key)
        # vgrid entry must be a list of strings
        if isinstance(machine['vgrid'], basestring):
            machine['vgrid'] = machine['vgrid'].split()

        # All job descriptions associated with this virtual machine

        jobs = []
        match_line = "$VBOXMANAGE -q createvm --name '" + vm_def_base \
            + "' --register"
        # we cannot inspect all mrsl files - filter by year is good guesstimate
        # TODO: mark vms jobs for easy finding without brute force search
        for mrsl_path in glob(os.path.join(mrsl_files_dir, '*_%d_*' % \
                                           datetime.date.today().year)):
            for line in open(os.path.abspath(mrsl_path), 'r', 1):
                if match_line in line:
                    jobs.append(unpickle(mrsl_path, configuration.logger))

        # Base the state on the latest job.
        #
        # Now determine the state of the jobs.
        # Job status can be one of EXECUTING, CANCELED, FAILED, QUEUED,
        # FINISHED, the machine state mapping is:
        # EXECUTING -> Powered On
        # CANCELED/FAILED/FINISHED -> Powered Off
        # QUEUED -> Booting
        #
        # TODO: 3

        if len(jobs) > 0:
            sorted_jobs = sorted(jobs, key=operator.itemgetter('JOB_ID'))
            last = sorted_jobs[-1]
            machine['status'] = last['STATUS']
            if machine['status'] == 'EXECUTING':
                machine['execution_time'] = last['EXECUTING_TIMESTAMP']
            machine['job_id'] = last['JOB_ID']

        vms.append(machine)

    return vms
Exemple #56
0
        return (False, '''Fatal error: Could not get exclusive access or write
to %s''' % configuration.grid_stdin)

    if forceddestination and forceddestination.has_key('RE_NAME'):

        # add job_id to runtime environment verification history

        unique_resource_name = forceddestination['UNIQUE_RESOURCE_NAME']
        re_name = forceddestination['RE_NAME']

        resource_config_filename = configuration.resource_home\
             + unique_resource_name + '/config'

        # open resource config

        resource_config = unpickle(resource_config_filename, logger)
        if not resource_config:
            logger.error('error unpickling resource config')
            return False

        dict_entry = (job_id, client_id)

        # add entry to runtime verification history

        if not resource_config.has_key('RUNTVERIFICATION'):
            resource_config['RUNTVERIFICATION'] = \
                {re_name: [dict_entry]}
        else:
            before_runt_dict = resource_config['RUNTVERIFICATION']
            if not before_runt_dict.has_key(re_name):
                before_runt_dict[re_name] = [].append(dict_entry)
Exemple #57
0
def main(client_id, user_arguments_dict):
    """Main function used by front end"""
    (configuration, logger, output_objects, op_name) = \
        initialize_main_variables(client_id, op_header=False)
    client_dir = client_id_dir(client_id)
    defaults = signature()[1]
    (validate_status, accepted) = validate_input_and_cert(
        user_arguments_dict,
        defaults,
        output_objects,
        client_id,
        configuration,
        allow_rejects=False,
    )

    if not validate_status:
        return (accepted, returnvalues.CLIENT_ERROR)

    logger.debug("User: %s executing %s" % (client_id, op_name))
    if not configuration.site_enable_jupyter:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'The Jupyter service is not enabled on the system'
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    if not configuration.site_enable_sftp_subsys and not \
            configuration.site_enable_sftp:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'The required sftp service is not enabled on the system'
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    if configuration.site_enable_sftp:
        sftp_port = configuration.user_sftp_port

    if configuration.site_enable_sftp_subsys:
        sftp_port = configuration.user_sftp_subsys_port

    requested_service = accepted['service'][-1]
    service = {
        k: v
        for options in configuration.jupyter_services
        for k, v in options.items()
        if options['service_name'] == requested_service
    }

    if not service:
        valid_services = [
            options['name'] for options in configuration.jupyter_services
        ]
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            '%s is not a valid jupyter service, '
            'allowed include %s' % (requested_service, valid_services)
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    valid_service = valid_jupyter_service(configuration, service)
    if not valid_service:
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'The service %s appears to be misconfigured, '
            'please contact a system administrator about this issue' %
            requested_service
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    host = get_host_from_service(configuration, service)
    # Get an active jupyterhost
    if host is None:
        logger.error("No active jupyterhub host could be found")
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'Failed to establish connection to the %s Jupyter service' %
            service['service_name']
        })
        output_objects.append({
            'object_type': 'link',
            'destination': 'jupyter.py',
            'text': 'Back to Jupyter services overview'
        })
        return (output_objects, returnvalues.SYSTEM_ERROR)

    remote_user = unescape(os.environ.get('REMOTE_USER', '')).strip()
    if not remote_user:
        logger.error("Can't connect to jupyter with an empty REMOTE_USER "
                     "environment variable")
        output_objects.append({
            'object_type':
            'error_text',
            'text':
            'Failed to establish connection to the Jupyter service'
        })
        return (output_objects, returnvalues.CLIENT_ERROR)
    # Ensure the remote_user dict can be http posted
    remote_user = str(remote_user)

    # TODO, activate admin info
    # remote_user = {'USER': username, 'IS_ADMIN': is_admin(client_id,
    #                                                      configuration,
    # logger)}

    # Regular sftp path
    mnt_path = os.path.join(configuration.jupyter_mount_files_dir, client_dir)
    # Subsys sftp path
    subsys_path = os.path.join(configuration.mig_system_files, 'jupyter_mount')
    # sftp session path
    link_home = configuration.sessid_to_jupyter_mount_link_home

    user_home_dir = os.path.join(configuration.user_home, client_dir)

    # Preparing prerequisites
    if not os.path.exists(mnt_path):
        os.makedirs(mnt_path)

    if not os.path.exists(link_home):
        os.makedirs(link_home)

    if configuration.site_enable_sftp_subsys:
        if not os.path.exists(subsys_path):
            os.makedirs(subsys_path)

    # Make sure ssh daemon does not complain
    tighten_key_perms(configuration, client_id)

    url_base = '/' + service['service_name']
    url_home = url_base + '/home'
    url_auth = host + url_base + '/hub/login'
    url_data = host + url_base + '/hub/user-data'

    # Does the client home dir contain an active mount key
    # If so just keep on using it.
    jupyter_mount_files = [
        os.path.join(mnt_path, jfile) for jfile in os.listdir(mnt_path)
        if jfile.endswith('.jupyter_mount')
    ]

    logger.info("User: %s mount files: %s" %
                (client_id, "\n".join(jupyter_mount_files)))
    logger.debug("Remote-User %s" % remote_user)
    active_mounts = []
    for jfile in jupyter_mount_files:
        jupyter_dict = unpickle(jfile, logger)
        if not jupyter_dict:
            # Remove failed unpickle
            logger.error("Failed to unpickle %s removing it" % jfile)
            remove_jupyter_mount(jfile, configuration)
        else:
            # Mount has been timed out
            if not is_active(jupyter_dict):
                remove_jupyter_mount(jfile, configuration)
            else:
                # Valid mount
                active_mounts.append({'path': jfile, 'state': jupyter_dict})

    logger.debug(
        "User: %s active keys: %s" %
        (client_id, "\n".join([mount['path'] for mount in active_mounts])))

    # If multiple are active, remove oldest
    active_mount, old_mounts = get_newest_mount(active_mounts)
    for mount in old_mounts:
        remove_jupyter_mount(mount['path'], configuration)

    # A valid active key is already present redirect straight to the jupyter
    # service, pass most recent mount information
    if active_mount is not None:
        mount_dict = mig_to_mount_adapt(active_mount['state'])
        user_dict = mig_to_user_adapt(active_mount['state'])
        logger.debug("Existing header values, Mount: %s User: %s" %
                     (mount_dict, user_dict))

        auth_header = {'Remote-User': remote_user}
        json_data = {'data': {'Mount': mount_dict, 'User': user_dict}}

        if configuration.site_enable_workflows:
            workflows_dict = mig_to_workflows_adapt(active_mount['state'])
            if not workflows_dict:
                # No cached workflows session could be found -> refresh with a
                # one
                workflow_session_id = get_workflow_session_id(
                    configuration, client_id)
                if not workflow_session_id:
                    workflow_session_id = create_workflow_session_id(
                        configuration, client_id)
                # TODO get this dynamically
                url = configuration.migserver_https_sid_url + \
                    '/cgi-sid/workflowsjsoninterface.py?output_format=json'
                workflows_dict = {
                    'WORKFLOWS_URL': url,
                    'WORKFLOWS_SESSION_ID': workflow_session_id
                }

            logger.debug("Existing header values, Workflows: %s" %
                         workflows_dict)
            json_data['workflows_data'] = {'Session': workflows_dict}

        with requests.session() as session:
            # Authenticate and submit data
            response = session.post(url_auth, headers=auth_header)
            if response.status_code == 200:
                response = session.post(url_data, json=json_data)
                if response.status_code != 200:
                    logger.error(
                        "Jupyter: User %s failed to submit data %s to %s" %
                        (client_id, json_data, url_data))
            else:
                logger.error(
                    "Jupyter: User %s failed to authenticate against %s" %
                    (client_id, url_auth))

        # Redirect client to jupyterhub
        return jupyter_host(configuration, output_objects, remote_user,
                            url_home)

    # Create a new keyset
    # Create login session id
    session_id = generate_random_ascii(2 * session_id_bytes,
                                       charset='0123456789abcdef')

    # Generate private/public keys
    (mount_private_key,
     mount_public_key) = generate_ssh_rsa_key_pair(encode_utf8=True)

    # Known hosts
    sftp_addresses = socket.gethostbyname_ex(
        configuration.user_sftp_show_address or socket.getfqdn())

    # Subsys sftp support
    if configuration.site_enable_sftp_subsys:
        # Restrict possible mount agent
        auth_content = []
        restrict_opts = 'no-agent-forwarding,no-port-forwarding,no-pty,'
        restrict_opts += 'no-user-rc,no-X11-forwarding'
        restrictions = '%s' % restrict_opts
        auth_content.append('%s %s\n' % (restrictions, mount_public_key))
        # Write auth file
        write_file('\n'.join(auth_content),
                   os.path.join(subsys_path, session_id + '.authorized_keys'),
                   logger,
                   umask=027)

    logger.debug("User: %s - Creating a new jupyter mount keyset - "
                 "private_key: %s public_key: %s " %
                 (client_id, mount_private_key, mount_public_key))

    jupyter_dict = {
        'MOUNT_HOST': configuration.short_title,
        'SESSIONID': session_id,
        'USER_CERT': client_id,
        # don't need fraction precision, also not all systems provide fraction
        # precision.
        'CREATED_TIMESTAMP': int(time.time()),
        'MOUNTSSHPRIVATEKEY': mount_private_key,
        'MOUNTSSHPUBLICKEY': mount_public_key,
        # Used by the jupyterhub to know which host to mount against
        'TARGET_MOUNT_ADDR': "@" + sftp_addresses[0] + ":",
        'PORT': sftp_port
    }
    client_email = extract_field(client_id, 'email')
    if client_email:
        jupyter_dict.update({'USER_EMAIL': client_email})

    if configuration.site_enable_workflows:
        workflow_session_id = get_workflow_session_id(configuration, client_id)
        if not workflow_session_id:
            workflow_session_id = create_workflow_session_id(
                configuration, client_id)
        # TODO get this dynamically
        url = configuration.migserver_https_sid_url + \
            '/cgi-sid/workflowsjsoninterface.py?output_format=json'
        jupyter_dict.update({
            'WORKFLOWS_URL': url,
            'WORKFLOWS_SESSION_ID': workflow_session_id
        })

    # Only post the required keys, adapt to API expectations
    mount_dict = mig_to_mount_adapt(jupyter_dict)
    user_dict = mig_to_user_adapt(jupyter_dict)
    workflows_dict = mig_to_workflows_adapt(jupyter_dict)
    logger.debug("User: %s Mount header: %s" % (client_id, mount_dict))
    logger.debug("User: %s User header: %s" % (client_id, user_dict))
    if workflows_dict:
        logger.debug("User: %s Workflows header: %s" %
                     (client_id, workflows_dict))

    # Auth and pass a new set of valid mount keys
    auth_header = {'Remote-User': remote_user}
    json_data = {'data': {'Mount': mount_dict, 'User': user_dict}}
    if workflows_dict:
        json_data['workflows_data'] = {'Session': workflows_dict}

    # First login
    with requests.session() as session:
        # Authenticate
        response = session.post(url_auth, headers=auth_header)
        if response.status_code == 200:
            response = session.post(url_data, json=json_data)
            if response.status_code != 200:
                logger.error(
                    "Jupyter: User %s failed to submit data %s to %s" %
                    (client_id, json_data, url_data))
        else:
            logger.error("Jupyter: User %s failed to authenticate against %s" %
                         (client_id, url_auth))

    # Update pickle with the new valid key
    jupyter_mount_state_path = os.path.join(mnt_path,
                                            session_id + '.jupyter_mount')

    pickle(jupyter_dict, jupyter_mount_state_path, logger)

    # Link jupyter pickle state file
    linkdest_new_jupyter_mount = os.path.join(mnt_path,
                                              session_id + '.jupyter_mount')

    linkloc_new_jupyter_mount = os.path.join(link_home,
                                             session_id + '.jupyter_mount')
    make_symlink(linkdest_new_jupyter_mount, linkloc_new_jupyter_mount, logger)

    # Link userhome
    linkloc_user_home = os.path.join(link_home, session_id)
    make_symlink(user_home_dir, linkloc_user_home, logger)

    return jupyter_host(configuration, output_objects, remote_user, url_home)
Exemple #58
0
def create_monitor(vgrid_name):
    """Write monitor HTML file for vgrid_name"""

    html_file = os.path.join(configuration.vgrid_home, vgrid_name, "%s.html" % configuration.vgrid_monitor)

    print "collecting statistics for VGrid %s" % vgrid_name
    sleep_secs = configuration.sleep_secs
    slackperiod = configuration.slackperiod
    now = time.asctime(time.localtime())

    html_vars = {
        "sleep_secs": sleep_secs,
        "vgrid_name": vgrid_name,
        "logo_url": "/images/logo.jpg",
        "now": now,
        "short_title": configuration.short_title,
    }

    html = get_cgi_html_header(
        configuration,
        "%(short_title)s Monitor, VGrid %(vgrid_name)s" % html_vars,
        "",
        True,
        """<meta http-equiv="refresh" content="%(sleep_secs)s" />
        """
        % html_vars,
        themed_styles(configuration),
        """
<script type="text/javascript" src="/images/js/jquery.js"></script>
<script type="text/javascript" src="/images/js/jquery.tablesorter.js"></script>

<script type="text/javascript" >

$(document).ready(function() {

          // table initially sorted by col. 1 (name)
          var sortOrder = [[1,0]];

          // use image path for sorting if there is any inside
          var imgTitle = function(contents) {
              var key = $(contents).find("a").attr("class");
              if (key == null) {
                  key = $(contents).html();
              }
              return key;
          }
          $("table.monitor").tablesorter({widgets: ["zebra"],
                                          textExtraction: imgTitle,
                                         });
          $("table.monitor").each(function () {
              try {
                  $(this).trigger("sorton", [sortOrder]);
              } catch(err) {
                  /* tablesorter chokes on empty tables - just continue */
              }
          });
     }
);
</script>
        """,
        "",
        False,
    )
    html += (
        """
<!-- end of raw header: this line is used by showvgridmonitor -->
<h1>Statistics/monitor for the %(vgrid_name)s VGrid</h1>
<div class="generatornote smallcontent">
This page was generated %(now)s (automatic refresh every %(sleep_secs)s secs).
</div>
"""
        % html_vars
    )

    # loop and get totals

    parse_count = 0
    queued_count = 0
    frozen_count = 0
    executing_count = 0
    finished_count = 0
    failed_count = 0
    retry_count = 0
    canceled_count = 0

    cpucount_requested = 0
    cpucount_done = 0
    nodecount_requested = 0
    nodecount_done = 0
    cputime_requested = 0
    cputime_done = 0
    used_walltime = 0
    disk_requested = 0
    disk_done = 0
    memory_requested = 0
    memory_done = 0
    runtimeenv_dict = {"": 0}
    runtimeenv_requested = 0
    runtimeenv_done = 0

    number_of_jobs = 0
    up_count = 0
    down_count = 0
    slack_count = 0

    job_assigned = 0
    job_assigned_cpus = 0

    gstat = GridStat(configuration, logger)

    runtimeenv_dict = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "RUNTIMEENVIRONMENT", {})

    parse_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "PARSE")
    queued_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "QUEUED")
    frozen_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "FROZEN")
    executing_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "EXECUTING")
    failed_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "FAILED")
    retry_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "RETRY")
    canceled_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "CANCELED")
    expired_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "EXPIRED")
    finished_count = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "FINISHED")

    nodecount_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "NODECOUNT_REQ")
    nodecount_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "NODECOUNT_DONE")
    cputime_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "CPUTIME_REQ")
    cputime_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "CPUTIME_DONE")

    used_walltime = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "USED_WALLTIME")

    if used_walltime == 0:
        used_walltime = datetime.timedelta(0)

    used_walltime = format_timedelta(used_walltime)

    disk_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "DISK_REQ")
    disk_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "DISK_DONE")
    memory_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "MEMORY_REQ")
    memory_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "MEMORY_DONE")
    cpucount_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "CPUCOUNT_REQ")
    cpucount_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "CPUCOUNT_DONE")
    runtimeenv_requested = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "RUNTIMEENVIRONMENT_REQ")
    runtimeenv_done = gstat.get_value(gstat.VGRID, vgrid_name.upper(), "RUNTIMEENVIRONMENT_DONE")

    number_of_jobs = parse_count
    number_of_jobs += queued_count
    number_of_jobs += frozen_count
    number_of_jobs += expired_count
    number_of_jobs += canceled_count
    number_of_jobs += failed_count
    number_of_jobs += executing_count
    number_of_jobs += finished_count
    number_of_jobs += retry_count

    html_vars = {
        "parse_count": parse_count,
        "queued_count": queued_count,
        "frozen_count": frozen_count,
        "executing_count": executing_count,
        "failed_count": failed_count,
        "retry_count": retry_count,
        "canceled_count": canceled_count,
        "expired_count": expired_count,
        "finished_count": finished_count,
        "number_of_jobs": number_of_jobs,
        "cpucount_requested": cpucount_requested,
        "cpucount_done": cpucount_done,
        "nodecount_requested": nodecount_requested,
        "nodecount_done": nodecount_done,
        "cputime_requested": cputime_requested,
        "cputime_done": cputime_done,
        "used_walltime": used_walltime,
        "disk_requested": disk_requested,
        "disk_done": disk_done,
        "memory_requested": memory_requested,
        "memory_done": memory_done,
        "runtimeenv_requested": runtimeenv_requested,
        "runtimeenv_done": runtimeenv_done,
    }

    html += (
        """<h2>Job Stats</h2><table class=monitorstats><tr><td>
<table class=monitorjobs><tr class=title><td>Job State</td><td>Number of jobs</td></tr>
<tr><td>Parse</td><td>%(parse_count)s</td></tr>
<tr><td>Queued</td><td>%(queued_count)s</td></tr>
<tr><td>Frozen</td><td>%(frozen_count)s</td></tr>
<tr><td>Executing</td><td>%(executing_count)s</td></tr>
<tr><td>Failed</td><td>%(failed_count)s</td></tr>
<tr><td>Retry</td><td>%(retry_count)s</td></tr>
<tr><td>Canceled</td><td>%(canceled_count)s</td></tr>
<tr><td>Expired</td><td>%(expired_count)s</td></tr>
<tr><td>Finished</td><td>%(finished_count)s</td></tr>
<tr><td>Total</td><td>%(number_of_jobs)s</td></tr>
</table>
</td><td>
<table class=monitorresreq>
<tr class=title><td>Requirement</td><td>Requested</td><td>Done</td></tr>
<tr><td>Cpucount</td><td>%(cpucount_requested)s</td><td>%(cpucount_done)s</td></tr>
<tr><td>Nodecount</td><td>%(nodecount_requested)s</td><td>%(nodecount_done)s</td></tr>
<tr><td>Cputime</td><td>%(cputime_requested)s</td><td>%(cputime_done)s</td></tr>
<tr><td>GB Disk</td><td>%(disk_requested)s</td><td>%(disk_done)s</td></tr>
<tr><td>MB Memory</td><td>%(memory_requested)s</td><td>%(memory_done)s</td></tr>
<tr><td>Runtime Envs</td><td>%(runtimeenv_requested)s</td><td>%(runtimeenv_done)s</td></tr>
<tr><td>Used Walltime</td><td colspan='2'>%(used_walltime)s</td></tr>
</table><br />
</td><td>
<div class=monitorruntimeenvdetails>
<table class=monitorruntimeenvdone>
<tr class=title><td>Runtime Envs Done</td><td></td></tr>
"""
        % html_vars
    )

    if len(runtimeenv_dict.keys()) < 1:

        # No runtimeenv requests

        html += "<tr><td></td><td>-</td></tr>\n"
    else:
        for entry in runtimeenv_dict.keys():
            if not entry == "":
                html += "<tr><td>" + entry + "</td><td>" + str(runtimeenv_dict[entry]) + "</td></tr>\n"

    total_number_of_exe_resources, total_number_of_store_resources = 0, 0
    total_number_of_exe_cpus, total_number_of_store_gigs = 0, 0

    vgrid_name_list = vgrid_name.split("/")
    current_dir = ""

    exes, stores = "", ""
    for vgrid_name_part in vgrid_name_list:
        current_dir = os.path.join(current_dir, vgrid_name_part)
        abs_mon_dir = os.path.join(configuration.vgrid_home, current_dir)
        # print 'dir: %s' % abs_mon_dir
        # Potential race - just ignore if it disappeared
        try:
            sorted_names = os.listdir(abs_mon_dir)
        except OSError:
            continue
        sorted_names.sort()
        for filename in sorted_names:
            # print filename
            if filename.startswith("monitor_last_request_"):

                # read last request helper file

                mon_file_name = os.path.join(abs_mon_dir, filename)
                print "found " + mon_file_name
                last_request_dict = unpickle(mon_file_name, logger)
                if not last_request_dict:
                    print "could not open and unpickle: " + mon_file_name
                    continue

                difference = datetime.datetime.now() - last_request_dict["CREATED_TIME"]
                days = str(difference.days)
                hours = str(difference.seconds / 3600)
                minutes = str((difference.seconds % 3600) / 60)
                seconds = str((difference.seconds % 60) % 60)

                if last_request_dict.has_key("CPUTIME"):
                    cputime = last_request_dict["CPUTIME"]
                elif last_request_dict.has_key("cputime"):
                    cputime = last_request_dict["cputime"]
                else:
                    print "ERROR: last request does not contain cputime field!: %s" % last_request_dict
                    continue

                try:
                    cpusec = int(cputime)
                except ValueError:
                    try:
                        cpusec = int(float(cputime))
                    except ValueError, verr:
                        print "ERROR: failed to parse cputime %s: %s" % (cputime, verr)

                # Include execution delay guesstimate for strict fill
                # LRMS resources

                try:
                    delay = int(last_request_dict["EXECUTION_DELAY"])
                except KeyError:
                    delay = 0
                except ValueError:
                    delay = 0

                time_remaining = (
                    last_request_dict["CREATED_TIME"]
                    + datetime.timedelta(seconds=cpusec)
                    + datetime.timedelta(seconds=delay)
                ) - datetime.datetime.now()
                days_rem = str(time_remaining.days)
                hours_rem = str(time_remaining.seconds / 3600)
                minutes_rem = str((time_remaining.seconds % 3600) / 60)
                seconds_rem = str((time_remaining.seconds % 60) % 60)

                if time_remaining.days < -7:
                    try:
                        print "removing: %s as we havent seen him for %s days." % (
                            mon_file_name,
                            abs(time_remaining).days,
                        )
                        os.remove(mon_file_name)
                    except Exception, err:
                        print "could not remove: '%s' Error: %s" % (mon_file_name, str(err))
                    pass
                else:
                    unique_res_name_and_exe_list = filename.split("monitor_last_request_", 1)
                    if cpusec == 0:
                        resource_status = "unavailable"
                    elif time_remaining.days < 0:

                        # time_remaining.days < 0 means that we have passed the specified time

                        time_rem_abs = abs(time_remaining)
                        if time_rem_abs.days == 0 and int(time_rem_abs.seconds) < int(slackperiod):
                            resource_status = "slack"
                            slack_count = slack_count + 1
                        else:
                            resource_status = "offline"
                            down_count = down_count + 1
                    else:
                        resource_status = "online"
                        up_count = up_count + 1

                    exes += "<tr>"
                    exes += "<td><img src=/images/status-icons/%s.png /></td>" % resource_status
                    public_id = unique_res_name_and_exe_list[1]
                    if last_request_dict["RESOURCE_CONFIG"].get("ANONYMOUS", True):
                        public_id = anon_resource_id(public_id)
                    public_name = last_request_dict["RESOURCE_CONFIG"].get("PUBLICNAME", "")
                    resource_parts = public_id.split("_", 2)
                    resource_name = "<a href='viewres.py?unique_resource_name=%s'>%s</a>" % (
                        resource_parts[0],
                        resource_parts[0],
                    )
                    if public_name:
                        resource_name += "<br />(alias %s)" % public_name
                    else:
                        resource_name += "<br />(no alias)"
                    resource_name += "<br />%s" % resource_parts[1]
                    exes += "<td>%s</td>" % resource_name

                    exes += "<td>%s<br />(%sd %sh %sm %ss ago)</td>" % (
                        time.asctime(last_request_dict["CREATED_TIME"].timetuple()),
                        days,
                        hours,
                        minutes,
                        seconds,
                    )
                    exes += "<td>" + vgrid_name + "</td>"
                    runtime_envs = last_request_dict["RESOURCE_CONFIG"]["RUNTIMEENVIRONMENT"]
                    re_list_text = ", ".join([i[0] for i in runtime_envs])
                    exes += '<td title="%s">' % re_list_text + str(len(runtime_envs)) + "</td>"
                    exes += (
                        "<td>"
                        + str(last_request_dict["RESOURCE_CONFIG"]["CPUTIME"])
                        + "</td><td>"
                        + str(last_request_dict["RESOURCE_CONFIG"]["NODECOUNT"])
                        + "</td><td>"
                        + str(last_request_dict["RESOURCE_CONFIG"]["CPUCOUNT"])
                        + "</td><td>"
                        + str(last_request_dict["RESOURCE_CONFIG"]["DISK"])
                        + "</td><td>"
                        + str(last_request_dict["RESOURCE_CONFIG"]["MEMORY"])
                        + "</td><td>"
                        + str(last_request_dict["RESOURCE_CONFIG"]["ARCHITECTURE"])
                        + "</td>"
                    )
                    exes += (
                        "<td>" + last_request_dict["STATUS"] + "</td><td>" + str(last_request_dict["CPUTIME"]) + "</td>"
                    )

                    exes += "<td class=status_%s>" % resource_status
                    if "unavailable" == resource_status:
                        exes += "-"
                    elif "slack" == resource_status:
                        exes += "Within slack period (%s < %s secs)" % (time_rem_abs.seconds, slackperiod)
                    elif "offline" == resource_status:
                        exes += "down?"
                    else:
                        exes += "%sd, %sh, %sm, %ss" % (days_rem, hours_rem, minutes_rem, seconds_rem)
                    exes += "</td>"

                    exes += "</tr>\n"
                    if last_request_dict["STATUS"] == "Job assigned":
                        job_assigned = job_assigned + 1
                        job_assigned_cpus = job_assigned_cpus + int(
                            last_request_dict["RESOURCE_CONFIG"]["NODECOUNT"]
                        ) * int(last_request_dict["RESOURCE_CONFIG"]["CPUCOUNT"])

                    total_number_of_exe_resources += 1
                    total_number_of_exe_cpus += int(last_request_dict["RESOURCE_CONFIG"]["NODECOUNT"]) * int(
                        last_request_dict["RESOURCE_CONFIG"]["CPUCOUNT"]
                    )
            elif filename.startswith("monitor_last_status_"):

                # store must be linked to this vgrid, not only parent vgrid:
                # inheritance only covers access, not automatic participation

                if current_dir != vgrid_name:
                    continue

                # read last resource action status file

                mon_file_name = os.path.join(abs_mon_dir, filename)
                print "found " + mon_file_name
                last_status_dict = unpickle(mon_file_name, logger)
                if not last_status_dict:
                    print "could not open and unpickle: " + mon_file_name
                    continue

                difference = datetime.datetime.now() - last_status_dict["CREATED_TIME"]
                days = str(difference.days)
                hours = str(difference.seconds / 3600)
                minutes = str((difference.seconds % 3600) / 60)
                seconds = str((difference.seconds % 60) % 60)

                if last_status_dict["STATUS"] == "stopped":
                    time_stopped = datetime.datetime.now() - last_status_dict["CREATED_TIME"]
                    if time_stopped.days > 7:
                        try:
                            print "removing: %s as we havent seen him for %s days." % (
                                mon_file_name,
                                abs(time_stopped).days,
                            )
                            os.remove(mon_file_name)
                        except Exception, err:
                            print "could not remove: '%s' Error: %s" % (mon_file_name, str(err))
                        continue
def set_user_display_active(
    client_id,
    display_number,
    vnc_port,
    password,
    configuration,
    logger,
    ):

    (init_ret, filename) = \
        initialize_and_get_display_dict_filename(configuration, logger)
    if not init_ret:
        return (False, 'could not initialize')

    (dis_ret, dis_dict) = get_dict_from_display_number(display_number,
            configuration, logger)
    if not dis_ret:
        return (False, 'dict error, %s' % dis_dict)
    if dis_dict != -1:
        if dis_dict['client_id'] != client_id:

        # display occupied by another user!

            return (False, 'display %s already in use by another user!'
                     % display_number)

    # getting here means display is free or used by client_id

    dict = unpickle(filename, logger)
    if dict == False:
        return (False, 'could not unpickle %s' % filename)

    current_display = get_users_display_number(client_id,
            configuration, logger)
    if not current_display:

    # register display

        dict[display_number] = {'client_id': client_id,
                                'vnc_port': vnc_port,
                                'password': password}
        pickle_status = pickle(dict, filename, logger)

        if not pickle_status:
            return (False, 'could not pickle %s when adding %s'
                     % (filename, dict[display_number]))
        logger.info('successfuly registered that display %s is in use by %s in %s'
                     % (display_number, client_id, filename))
        return (True, '')

    if current_display != display_number and current_display != -1:

    # problems..

        return (False,
                'set_user_display_active met a conflict, can not set display %s when user already has %s registered'
                 % (display_number, current_display))
    else:

    # add display to dict

        dict[display_number] = {'client_id': client_id,
                                'vnc_port': vnc_port,
                                'password': password}
        pickle_status = pickle(dict, filename, logger)

        if not pickle_status:
            return (False, 'could not pickle %s when adding %s'
                     % (filename, dict[display_number]))

        logger.info('successfuly registered that display %s is in use by %s in %s %s'
                     % (display_number, client_id, dict, filename))
        return (True, '')
Exemple #60
0
def load_schedule_cache(path, logger):
    """Load schedule cache from path"""

    return io.unpickle(path, logger)