Beispiel #1
0
    def initialize_algo_from_model(algo_options, searchinfo, namespace):
        """Init algo from model if possible, and catch discrepancies.

        Args:
            algo_options (dict): algo options
            searchinfo (dict): information required for search
            namespace (string): namespace, 'user' or 'app'
        Returns:
            algo (object/None): loaded algo or None
            algo_options (dict): algo option
        """
        algo = None
        if 'model_name' in algo_options:
            try:
                model_algo_name, algo, model_options = models.base.load_model(
                    algo_options['model_name'], searchinfo, namespace=namespace)
            except models.models_util.ModelNotFoundException:
                algo = None
            except Exception as e:
                cexc.log_traceback()
                raise RuntimeError('Failed to load model "%s". Exception: %s.' % (
                    algo_options['model_name'], str(e)))

            if algo is not None:
                FitPartialProcessor.catch_model_discrepancies(algo_options,
                                                              model_options,
                                                              model_algo_name)

                # Pre 2.2 models do not save algo_name in their model options
                # So we must re add them here to be compatible with 2.2+ versions
                model_options['algo_name'] = algo_options['algo_name']
                algo_options = model_options

        return algo, algo_options
def copy_model(source_searchinfo, source_model_name, target_searchinfo,
               target_model_name):
    """
    copy the source_model_name from given namespace (source_searchinfo) to target namespace (target_searchinfo) with a new name.

    Args:
        source_searchinfo: used to get the namespace of the source model
        source_model_name: the name of the source model
        target_searchinfo: used to get the namespace of the target model
        target_model_name: the name of the target model

    Returns:
        (dict) the reply of the last lookup file POST request
    """
    # copy the model to staging directory
    staging_model_filepath = copy_model_to_staging(source_model_name,
                                                   source_searchinfo)
    target_file_name = model_name_to_filename(target_model_name)

    # send the model to target space with overwritten
    if os.access(staging_model_filepath, os.R_OK):
        reply = move_model_file_from_staging(
            target_file_name,
            target_searchinfo,
            namespace='user',
            model_filepath=staging_model_filepath)
    else:
        cexc.log_traceback()
        raise Exception(
            'The temp model file %s is missing or permission denied' %
            staging_model_filepath)

    return reply
    def _handle_clone_reply(replies):
        """
        merge the 'messages' part of all replies into the last reply.
        Args:
            replies (list) : the replies from all splunk REST requests, with ['content']['messages'] modified by _clone_experiment_model_callback()

        Returns:
            (dict) a modified version of mltk clone reply, trimming all attributes in `content` except 'messages'.
        """

        messages = []
        merged_reply = None  # set None to throw exception if replies is empty
        try:
            for reply in replies:
                messages.append(json.loads(reply['content'])['messages'][0])
                merged_reply = reply
                if not reply['success']:
                    break

            merged_reply['content'] = json.dumps({'messages': messages})
        except Exception as e:
            cexc.log_traceback()
            raise Exception(
                "Invalid JSON response from REST API, Please check mlspl.log for more details."
            )

        return merged_reply
Beispiel #4
0
    def load_model(model_name, searchinfo, namespace):
        """Try to load the model, error otherwise.

        Args:
            model_name (str): model name
            searchinfo (dict): information required for search
            namespace (string): namespace, 'user' or 'app'
        Returns:
            algo_name (str): algo name
            algo (model object): algo object
            model_options (dict): model options
        """
        try:
            algo_name, algo, model_options = models.base.load_model(
                model_name, searchinfo, namespace=namespace)
        except (OSError, IOError) as e:
            if e.errno == errno.ENOENT:
                raise RuntimeError('model "%s" does not exist.' % model_name)
            raise RuntimeError('Failed to load model "%s": %s.' %
                               (model_name, str(e)))
        except Exception as e:
            cexc.log_traceback()
            raise RuntimeError('Failed to load model "%s": %s.' %
                               (model_name, str(e)))
        return algo_name, algo, model_options
Beispiel #5
0
def get_model_from_btool_result(btool_dict, model_name, user, app, roles,
                                namespace):
    try:
        if namespace == 'user' and user in btool_dict and model_name in btool_dict[
                user]:
            result = btool_dict[user][model_name]
            user_match_str = os.path.join('users', user, app, 'lookups',
                                          model_name_to_filename(model_name))
            # Here only models in the user namespace is checked, because there is a issue/bug with btool
            # if username is also a role name in Splunk (e.g. username=power and there is the "power" role),
            # btool might return objects that the user have no permission on but role does.
            if result.endswith(user_match_str):
                return result

        app_match_str = os.path.join('apps', app, 'lookups',
                                     model_name_to_filename(model_name))
        merged_result = None
        for role in roles:
            try:
                path = btool_dict[role].pop(model_name)
                if path.endswith(app_match_str):
                    return path
                else:
                    # If "app:" is not used, check global namespace
                    if namespace != 'app' and (merged_result is None
                                               or merged_result < path):
                        merged_result = path
            except KeyError:
                continue  # Do Nothing, go to next item
    except Exception:
        cexc.log_traceback()
        raise Exception("Please check mlspl.log for more details.")
    return merged_result
Beispiel #6
0
    def fit(df, algo, algo_options):
        """Perform the literal fitting process.

        This method updates the algo by fitting with input data. Some of the
        algorithms additionally make predictions within their fit method, thus
        the predictions are returned in dataframe type. Some other algorithms do
        not make prediction in their fit method, thus None is returned.

        Args:
            df (dataframe): dataframe to fit the algo
            algo (object): initialized/loaded algo object
            algo_options (dict): algo options

        Returns:
            algo (object): updated algo object
            df (dataframe):
                - if algo.fit makes prediction, return prediction
                - if algo.fit does not make prediction, return input df
            has_applied (bool): flag to indicate whether df represents
                original df or prediction df
        """
        try:
            prediction_df = algo.fit(df, options=algo_options)
        except Exception as e:
            cexc.log_traceback()
            raise RuntimeError('Error while fitting "%s" model: %s' % (algo_options['algo_name'], str(e)))

        has_applied = isinstance(prediction_df, pd.DataFrame)

        if has_applied:
            df = prediction_df

        return algo, df, has_applied
    def initialize_processor(processor_name, process_options, searchinfo):
        """Import and initialize a processor.

        Processors are stored in ./bin/processors/
        The processors all inherit from the BaseProcessor class.

        Args:
            processor_name (str): processor name
            process_options (dict): process options
            searchinfo (dict): information required for search

        Returns:
            processor (object): initialized processor
        """
        try:
            processor_module = importlib.import_module(
                'processors.{}'.format(processor_name))
            processor_class = getattr(processor_module, processor_name)
        except AttributeError as e:
            logger.debug('Failed to import ML-SPL processor "%s"' %
                         processor_name)
            raise RuntimeError('Failed to import ML-SPL processor.')

        try:
            processor = processor_class(process_options, searchinfo)
        except Exception as e:
            cexc.log_traceback()
            logger.debug('Error while initializing processor "%s": %s' %
                         (processor_name, str(e)))
            raise RuntimeError(str(e))
        return processor
Beispiel #8
0
    def _fit(self, X):
        for variable in self.feature_variables:
            df_util.assert_field_present(X, variable)
        df_util.drop_unused_fields(X, self.feature_variables)
        df_util.assert_any_fields(X)
        df_util.assert_any_rows(X)

        if X[self.time_series].dtype == object:
            raise ValueError(
                '%s contains non-numeric data. ARIMA only accepts numeric data.'
                % self.time_series)
        X[self.time_series] = X[self.time_series].astype(float)

        try:
            self.estimator = _ARIMA(
                X[self.time_series].values,
                order=self.out_params['model_params']['order'],
                missing=self.out_params['model_params']['missing']).fit(
                    disp=False)
        except ValueError as e:
            if 'stationary' in e.message:
                raise ValueError(
                    "The computed initial AR coefficients are not "
                    "stationary. You should induce stationarity by choosing a different model order."
                )
            elif 'invertible' in e.message:
                raise ValueError(
                    "The computed initial MA coefficients are not invertible. "
                    "You should induce invertibility by choosing a different model order."
                )
            else:
                cexc.log_traceback()
                raise ValueError(e)
        except MissingDataError:
            raise RuntimeError('Empty or null values are not supported in %s. '
                               'If using timechart, try using a larger span.' %
                               self.time_series)
        except Exception as e:
            cexc.log_traceback()
            raise RuntimeError(e)

        # Saving the _time but not as a part of the ARIMA structure but as new attribute for ARIMA.
        if '_time' in self.feature_variables:
            freq = self._find_freq(X['_time'].values, self.freq_threshold)
            self.estimator.datetime_information = dict(
                ver=0,
                _time=X['_time'].values,
                freq=freq,
                # in seconds (unix epoch)
                first_timestamp=X['_time'].values[0],
                last_timestamp=X['_time'].values[-1],
                length=len(X))
        else:
            self.estimator.datetime_information = dict(ver=0,
                                                       _time=None,
                                                       freq=None,
                                                       first_time=None,
                                                       last_time=None,
                                                       length=len(X))
def get_file_path_from_content(content):
    try:
        file_path = content['entry'][0]['content']['eai:data']
    except Exception as e:
        cexc.log_traceback()
        raise Exception("Invalid JSON response from REST API, Please check mlspl.log for more details.")

    return file_path
Beispiel #10
0
 def initialize_algo(algo_options, searchinfo):
     algo_name = algo_options['algo_name']
     try:
         algo_class = initialize_algo_class(algo_name, searchinfo)
         return algo_class(algo_options)
     except Exception as e:
         cexc.log_traceback()
         raise RuntimeError('Error while initializing algorithm "%s": %s' % (
             algo_name, str(e)))
        def callback(model_name):
            draft_model_name = get_experiment_draft_model_name(model_name)

            try:
                return copy_model(searchinfo, draft_model_name, searchinfo,
                                  model_name)
            except ModelNotFoundException as e:
                cexc.log_traceback()
                logger.error(e)
                raise SplunkRestProxyException(
                    "%s: %s" % (str(e), draft_model_name), logging.ERROR,
                    httplib.NOT_FOUND)
 def _delete_models(request, url_parts):
     if len(url_parts) == 1:
         try:
             searchinfo = searchinfo_from_request(request)
             rest_proxy = rest_proxy_from_searchinfo(searchinfo)
             model_list = get_model_list_by_experiment(
                 rest_proxy, namespace='user', experiment_id=url_parts[0])
             for model_name in model_list:
                 url = rest_url_util.make_get_lookup_url(
                     rest_proxy, namespace='user', lookup_file=model_name)
                 reply = rest_proxy.make_rest_call('DELETE', url)
         except Exception as e:
             cexc.log_traceback()
             pass
Beispiel #13
0
    def setup_model(cls, process_options, searchinfo):
        """Load temp model, try to load real model, update options.

        Remove the tmp_dir in the process.

        Args:
            process_options (dict): process_options
            searchinfo (dict): information required for search
        Returns:
            algo_name (str): algorithm name
            algo (object): algorithm object
            process_options (dict): updated process options
        """
        tmp_dir = process_options.pop('tmp_dir')

        searchinfo = search_util.add_distributed_search_info(process_options, searchinfo)

        namespace = process_options.pop('namespace', None)
        try:
            algo_name, _, model_options = models.base.load_model(
                process_options['model_name'],
                searchinfo,
                namespace=namespace,
                model_dir=tmp_dir,
                skip_model_obj=True,
                tmp=True
            )
            algo = None
            logger.debug('Using tmp model to set required_fields.')
        except:
            # Try to load real model.
            try:
                algo_name, algo, model_options = models.base.load_model(
                    process_options['model_name'],
                    searchinfo,
                    namespace=namespace)
            except (OSError, IOError) as e:
                if e.errno == errno.ENOENT:
                    raise RuntimeError('model "%s" does not exist.' % process_options['model_name'])
                raise RuntimeError('Failed to load model "%s": %s.' % (
                    process_options['model_name'], str(e)))
            except Exception as e:
                cexc.log_traceback()
                raise RuntimeError('Failed to load model "%s": %s.' % (
                    process_options['model_name'], str(e)))

        model_options.update(process_options)  # process options override loaded model options
        process_options = model_options
        return algo_name, algo, process_options, namespace
Beispiel #14
0
    def save_temp_model(algo_options, tmp_dir):
        """Save temp model for follow-up apply.

        Args:
            algo_options (dict): algo options
            tmp_dir (str): temp directory to save model to
        """
        if 'model_name' in algo_options:
            try:
                models.base.save_model(algo_options['model_name'], None,
                                  algo_options['algo_name'], algo_options,
                                  model_dir=tmp_dir, tmp=True)
            except Exception as e:
                cexc.log_traceback()
                raise RuntimeError(
                    'Error while saving temporary model "%s": %s' % (algo_options['model_name'], e))
Beispiel #15
0
 def save_model(self):
     """Attempt to save the model, delete the temporary model."""
     if 'model_name' in self.algo_options:
         try:
             models.base.save_model(self.algo_options['model_name'], self.algo,
                               self.algo_options['algo_name'], self.algo_options,
                               max_size=self.resource_limits['max_model_size_mb'],
                               searchinfo=self.searchinfo, namespace=self.namespace)
         except Exception as e:
             cexc.log_traceback()
             raise RuntimeError('Error while saving model "%s": %s' % (self.algo_options['model_name'], e))
         try:
             models.base.delete_model(self.algo_options['model_name'],
                                 model_dir=self.tmp_dir, tmp=True)
         except Exception as e:
             cexc.log_traceback()
             logger.warn('Exception while deleting tmp model "%s": %s', self.algo_options['model_name'], e)
def load_scoring_function(module_name, func_name):
    """Load the scoring algorithm from correct module.

    Args:
        module_name (str): name of the module to load (eg. sklearn.metrics)
        func_name (str): name of the scoring function to load

    Returns:
        scoring (function): scoring function loaded from module
    """
    try:
        scoring_module = importlib.import_module(module_name)
        scoring = getattr(scoring_module, func_name)
    except (ImportError, AttributeError):
        cexc.log_traceback()
        err_msg = 'Scoring method {} is not available'.format(func_name)
        raise RuntimeError(err_msg)
    return scoring
    def score(df, score_method, scoring_options):
        """Perform the literal predict from the estimator.

        Args:
            df (dataframe): input data
            score_method (object): initialized score_method object
            scoring_options (dict): scoring options

        Returns:
            score_df (dataframe): output dataframe
        """
        try:
            score_df = score_method.score(df, scoring_options)
        except Exception as e:
            cexc.log_traceback()
            err_msg = 'Error while scoring "{}": {}'
            raise RuntimeError(
                err_msg.format(scoring_options['scoring_name'], str(e)))
        return score_df
Beispiel #18
0
    def fit(algo, df, options):
        """Perform the partial fit.

        Args:
            algo (object): algo object
            df (dataframe): dataframe to fit on
            options (dict): process options

        Returns:
            algo (object): updated algorithm
        """
        try:
            algo.partial_fit(df, options=options)
        except MLSPLNotImplementedError:
            raise RuntimeError('Algorithm "%s" does not support partial fit' % options['algo_name'])
        except Exception as e:
            cexc.log_traceback()
            raise RuntimeError('Error while fitting "%s" model: %s' % (options['algo_name'], str(e)))

        return algo
    def setup_score_method(self, scoring_options, searchinfo):
        """ Load scoring class and module name.

        Args:
            scoring_options (dict): scoring options
            searchinfo (dict): information required for search

        Returns:
            score_method (object): scoring class from sklearn
            scoring_module_name (str): scoring module name from scorings.conf
        """
        scoring_name = scoring_options['scoring_name']
        try:
            scoring_class, scoring_module_name = self.load_class_and_module_name(
                scoring_name, searchinfo)
            return scoring_class(scoring_options), scoring_module_name
        except Exception as e:
            cexc.log_traceback()
            err_msg = 'Error while initializing scoring method "{}": {}'
            raise RuntimeError(err_msg.format(scoring_name, str(e)))
Beispiel #20
0
def save_experiment(experiment,
                    update,
                    searchinfo,
                    experiment_dir=experiment_staging_dir,
                    namespace='user'):
    try:
        os.makedirs(experiment_dir)
    except OSError as e:
        if e.errno == errno.EEXIST and os.path.isdir(experiment_dir):
            pass
        else:
            cexc.log_traceback()
            raise Exception("Error creating experiment: %s, %s" %
                            (experiment["id"], e))

    experiment_name_to_open = '_' + str(uuid.uuid1()).replace('-', '_')
    # raises if invalid
    experiment_type_long = experiment['type'].lower()
    experiment_type_short = EXPERIMENT_TYPES_MAP[experiment_type_long]
    file_path = file_name_to_path(
        experiment_name_to_filename(experiment_name_to_open,
                                    experiment_type_short), experiment_dir)
    logger.debug('Saving experiment: %s' % file_path)

    with open(file_path, mode='w') as f:
        experiment_writer = csv.writer(f)

        # TODO: Version attribute
        experiment_writer.writerow(['experiment'])
        experiment_writer.writerow([json.dumps(experiment)])

    experiment_filename = experiment_name_to_filename(experiment["id"],
                                                      experiment_type_short)
    # File is closed at this point, but f.name is still accessible.
    reply = move_experiment_file_from_staging(experiment_filename, update,
                                              searchinfo, namespace, f.name)

    # decorate the new lookup file with experiment-specific info
    reply['entry'][0] = get_experiment_from_lookup(reply['entry'][0])

    return reply
Beispiel #21
0
    def apply(df, algo, process_options):
        """Perform the literal predict from the estimator.

        Args:
            df (dataframe): input data
            algo (object): initialized algo object
            process_options (dict): process options

        Returns:
            prediction_df (dataframe): output dataframe
        """
        try:
            prediction_df = algo.apply(df, options=process_options)
            gc.collect()

        except Exception as e:
            cexc.log_traceback()
            cexc.messages.warn('Error while applying model "%s": %s' % (process_options['model_name'], str(e)))
            raise RuntimeError(e)

        return prediction_df
Beispiel #22
0
    def get_output(self):
        """Override get_output from BaseProcessor.

        Check if prediction was already made, otherwise make prediction.

        Returns:
            (dataframe): output dataframe
        """
        if not self.has_applied:
            try:
                self.df = self.algo.apply(self.df, options=self.algo_options)
            except Exception as e:
                cexc.log_traceback()
                logger.debug('Error during apply phase of fit command. Check apply method of algorithm.')
                raise RuntimeError('Error while fitting "%s" model: %s' % (self.algo_options['algo_name'], str(e)))

        if self.df is None:
            messages.warn('Apply method did not return any results.')
            self.df = pd.DataFrame()

        return self.df
def parse_reply_for_rest(reply):
    """
    simplified version of lookups_parse_reply - instead of throwing custom Exceptions for non success case, it only
    throws one exception which is a wrapper of splunk reply.
    Args:
        reply:

    Returns:

    """
    try:
        if not reply['success']:
            raise SplunkRestException(reply)
        return json.loads(reply['content'])
    except SplunkRestException as e:
        cexc.log_traceback()
        raise SplunkRestException(reply)
    except Exception as e:
        cexc.log_traceback()
        raise Exception(
            "Invalid JSON response from REST API, Please check mlspl.log for more details."
        )
    def _add_model_name_to_reply(raw_reply, model_name):
        """
        a util function for customize the reply from Splunk lookup-table-file REST endpoint.
            1. if it's a success REST reply, insert type='INFO' and add custom attribute `mltk_model_name` to the
            `messages` parts.
            2. if it's not a success REST reply, only add the custom attribute.
        Args:
            raw_reply (dict) : a dict of raw reply from Splunk lookup-table-file request
            model_name: the model name which needs to be inserted.

        Returns:
            (dict) modified reply.
        """

        reply = copy.deepcopy(raw_reply)
        try:
            content = json.loads(raw_reply['content'])
            messages = content['messages']

            if len(messages) > 0:
                messages[0][MODEL_NAME_ATTR] = model_name
            else:
                message_success = {
                    'type': "INFO",
                    'text': '',
                    MODEL_NAME_ATTR: model_name,
                }
                messages.append(message_success)

            reply['content'] = json.dumps(content)

        except Exception as e:
            cexc.log_traceback()
            raise Exception(
                "Invalid JSON response from REST API, Please check mlspl.log for more details."
            )

        return reply
    def _handle_all_experiment_models(reply, callback_handler):
        """
        pass the callback_handler to each model for all search stages of an experiment, exit if handler returns failure.
        Args:
            reply (dict) : the reply object of an experiment GET request.
            callback_handler (func) : a callback handler for each model, it should return the reply of a REST request.

        Returns:
            (list): a list of replies from each handlers
        """

        try:
            content = json.loads(reply['content'])
            entries = content['entry']

            # a cache that stores the reply from the callback of each model
            reply_list = []

            for entry in entries:
                ss_json = entry['content']['searchStages']
                search_stages = json.loads(ss_json)
                for search_stage in search_stages:
                    model_name = search_stage.get('modelName')
                    if model_name is not None:
                        reply = callback_handler(model_name)
                        reply_list.append(reply)
                        # if any of the reply is not successful, stop the process and return the current reply list
                        if not reply.get('success'):
                            return reply_list

            return reply_list

        except Exception:
            cexc.log_traceback()
            raise Exception(
                "Invalid JSON response from REST API, Please check mlspl.log for more details."
            )
Beispiel #26
0
    def delete_model(process_options, searchinfo, namespace):
        """Actually delete the model.

        Args:
            process_options (dict): process options
            searchinfo (dict): information required for search
            namespace (string): namespace, 'user' or 'app'
        """
        try:
            deletemodels.delete_model(process_options['model_name'],
                                      searchinfo,
                                      namespace=namespace)
        except (OSError, IOError) as e:
            if e.errno == errno.ENOENT:
                raise RuntimeError('model "%s" does not exist.' %
                                   process_options['model_name'])
            raise RuntimeError('Failed to delete model "%s": %s.' %
                               (process_options['model_name'], str(e)))
        except Exception as e:
            cexc.log_traceback()
            raise RuntimeError('Failed to delete model "%s": %s.' %
                               (process_options['model_name'], str(e)))

        messages.info('Deleted model "%s"' % process_options['model_name'])
    def setup_model(cls, process_options, searchinfo):
        """Load temp model, try to load real model, update options.

        Remove the tmp_dir in the process.

        Args:
            process_options (dict): process_options
            searchinfo (dict): information required for search
        Returns:
            algo_name (str): algorithm name
            algo (object): algorithm object
            process_options (dict): updated process options
            namespace (str): namespace of the model
        """
        tmp_dir = process_options.pop('tmp_dir')

        searchinfo = search_util.add_distributed_search_info(
            process_options, searchinfo)

        namespace = process_options.pop('namespace', None)

        mlspl_conf = process_options.pop('mlspl_conf')

        # For MLA-1989 we cannot properly load a model in parsetmp search
        if is_parsetmp(searchinfo):
            process_options['mlspl_limits'] = {}
            process_options['feature_variables'] = ['*']
            return None, None, process_options, None

        try:
            algo_name, _, model_options = models.base.load_model(
                process_options['model_name'],
                searchinfo,
                namespace=namespace,
                model_dir=tmp_dir,
                skip_model_obj=True,
                tmp=True)
            algo = None
            logger.debug('Using tmp model to set required_fields.')
        except:
            # Try to load real model.
            try:
                algo_name, algo, model_options = models.base.load_model(
                    process_options['model_name'],
                    searchinfo,
                    namespace=namespace)
            except (OSError, IOError) as e:
                if e.errno == errno.ENOENT:
                    raise RuntimeError('model "%s" does not exist.' %
                                       process_options['model_name'])
                raise RuntimeError('Failed to load model "%s": %s.' %
                                   (process_options['model_name'], str(e)))
            except Exception as e:
                cexc.log_traceback()
                raise RuntimeError('Failed to load model "%s": %s.' %
                                   (process_options['model_name'], str(e)))

        model_options.update(
            process_options)  # process options override loaded model options
        process_options = model_options
        process_options['mlspl_limits'] = mlspl_conf.get_stanza(algo_name)
        return algo_name, algo, process_options, namespace
def add_distributed_search_info(process_options, searchinfo):
    """
    Add additional information required for distributed search to searchinfo given.

    Args:
        process_options (dict): the process options to pass to the processor
        searchinfo (dict): information required for search

    Returns:
        searchinfo (dict): the original input searchinfo dict updated with information for distributed search
    """

    # For MLA-1989, in parsetmp search, we do not add anything
    if is_parsetmp(searchinfo):
        return searchinfo

    # In the case we need this before process_options exists
    if process_options is None:
        process_options = searchinfo

    try:
        dispatch_dir = process_options.get('dispatch_dir')
        info = info_csv_to_dict(os.path.join(dispatch_dir, 'info.csv'))

        dispatch_base_folder = os.path.dirname(dispatch_dir)

        def get_root_from_info(dispatch_dir):
            """Recursively get _root_sid from info.csv until we find args.txt.

             If _root_sid is present without a value, it should be '' (empty string)
             if it is not present, we will default to None, which are both falsy

            Args:
                dispatch_dir (str): the dispatch directory path or the previous _root_sid value
            Returns
                dispatch_dir (str): the dispatch directory path where we can find args.txt 
            """

            if not dispatch_dir.startswith(dispatch_base_folder):
                dispatch_dir = os.sep.join(
                    [dispatch_base_folder, dispatch_dir])

            try:
                if 'args.txt' in os.listdir(dispatch_dir):
                    return dispatch_dir

                some_info = info_csv_to_dict(
                    os.path.join(dispatch_dir, 'info.csv'))
                if some_info.get('_root_sid'):
                    return get_root_from_info(some_info['_root_sid'])
            except IOError as e:
                pass
            return dispatch_dir

        dispatch_dir = get_root_from_info(dispatch_dir)

        searchinfo['bundle_path'] = get_bundle_path(info)
        searchinfo['is_remote'] = is_remote_search(info)

        if searchinfo['is_remote']:
            searchinfo['roles'] = args_util.parse_roles(
                os.path.join(dispatch_dir, 'args.txt'))

    except Exception as e:
        logger.debug(e)
        cexc.log_traceback()
        raise RuntimeError('Failed to load model "%s": ' %
                           (process_options['model_name']))

    return searchinfo