Esempio n. 1
0
    def add_info(self, exp_name):
        """Add infomations from the experiment name

        Args:
            exp_name(str): The name of the experiment.

        Returns:
            None
        """
        # Read faulty or not
        if config.get('default', 'nonfaulty_pattern') in exp_name:
            faulty = False
        else:
            faulty = True
        self.exp_info['faulty'] = faulty

        # Fault type
        name_sections = exp_name.split('-')
        fault_infos = name_sections[2].split('_')
        if faulty:
            if 'mem' in name_sections[2]:
                self.exp_info['fault_type'] = 'memoryleak'
            else:
                self.exp_info['fault_type'] = fault_infos[0]
            self.exp_info['faulty_resource'] = fault_infos[1]
        else:
            self.exp_info['fault_type'] = 'failurefree'
            self.exp_info['faulty_resource'] = 'none'
Esempio n. 2
0
def extractAnomaly(exp_name, src_dir):
    """Extract the anomaly from the SCAPI log file.

    Some information defined in the config file wil be used to assist.
    '[preprocess]/<log_file_name>' defines the name of the SCAPI log file.
    '[preprocess]/<anomaly_string>' defines the pattern to recognize a line
    that contains the anomaly information, '[preprocess]/<fault_start>' defines
    the start time of the experiment so the method knows when to start the
    extraction.

    Args:
        exp_name(str): the name of the experiment.
        src_dir(str): The name of the folder where the experiment is put.

    Returns:
        A list of sets, each set represent the anomalies (i.e. indices of KPIs)
        at the timestamp.
    """
    log_file_name = config.get('preprocess', 'log_file_name')
    anomaly_string = config.get('preprocess', 'anomaly_string')
    fault_start = config.get('preprocess', 'fault_start')
    anmlist = {}

    log_file = os.path.join(src_dir, exp_name, log_file_name)

    anomalies_file = open(log_file)
    lines = [x for x in anomalies_file if anomaly_string in x]

    ts_start = int(time.strftime('%s', time.strptime(fault_start, "%y%m%d")))

    for line in lines:
        line = line.split(',')

        gcd = int(line[4])
        kpi = kpi_info.get_kpi_by_id(gcd)

        # substract 3600 due to GMT time
        # TODO: make this more general
        reflex_time = int(line[2]) / 1000 - ts_start - 3600
        if reflex_time not in anmlist:
            anmlist[reflex_time] = []
        anmlist[reflex_time].append(kpi.tag)

    for ts, anmls in anmlist.items():
        anmlist[ts] = set(sorted(anmls))

    return anmlist
Esempio n. 3
0
    def filter_(cls, exp):
        """Implementation of filter_. See class doc for more information.

        Args:
            exp(obj): An util.runtime.Observation instance.

        Returns:
            True if the experiment can be kept, False if the experiment should
            be ruled out.
        """
        rsc_filter = config.get('exp_filter', 'rsc_filter')
        if rsc_filter.lower() == 'all':
            return True
        if rsc_filter.lower() == exp.exp_info['faulty_resource'].lower():
            return True
        return False
Esempio n. 4
0
    def filter_(cls, exp):
        """Implementation of filter_. See class doc for more information.

        Args:
            exp(obj): An util.runtime.Observation instance.

        Returns:
            True if the experiment can be kept, False if the experiment should
            be ruled out.
        """
        name_filter = [
            s.strip()
            for s in config.get('exp_filter', 'name_filter').split(',')
        ]

        if 'all' in name_filter:
            return True
        for name in name_filter:
            if name.lower() not in exp.exp_info['full_name'].lower():
                return False
        return True
Esempio n. 5
0
def preprocess():
    """The main API of the preprocess component.

    It will take the log files of SCAPI which contains the anomalies, and
    transform it to the foramat that users and the predictor/localizer can
    read. After transformation, it will split the anomalies at different
    timestamps to differnet txt files, with each filename corresponds to the
    timestamp.

    Args:
        None

    Returns:
        None
    """
    main_folder = localizer_config.get_folder('src')
    localizer_config.reset_path(main_folder)
    targets = config.get('preprocess', 'targets').split(',')
    for target in targets:
        localizer_log.msg("Preprocessing {tar}...".format(tar=target))
        process_folder(target)
Esempio n. 6
0
def filter_(exp_map):
    """Take the experiments and apply some filters (defined in the config file,
    see description of the config file in README for more information)

    Args:
        exp_map(dict): The dictionary where the keys are integers represent the
        id of the experiment and the values are util.runtime.Observation
        object.

    Returns:
        A new dictionary with the filtered experiments removed.
    """
    filter_name = config.get('exp_filter', 'filter')
    filter_klass = localizer_config.get_plugin('exp_filter',
                                               filter_name)
    new_map = {}
    for exp_id, exp in exp_map.items():
        if filter_klass.filter_(exp):
            new_map[exp_id] = exp

    return new_map
Esempio n. 7
0
def run(mode, model_cache_file_name, evaluation_is_on):
    """
    :param mode: train, predict
    :param model_cache_name: name to save the trained model (or to load the saved one). Example: LMT
    :return: N/A
    """

    if config.has_option('default', 'max_heapsize'):
        jvm.start(config.get('default', 'max_heapsize'))
    else:
        localizer_log.msg("default->max_heapsize record does not exist")
        jvm.start()

    # Create the folder (dst_folder folder) to put:
    #   1) Training data set in arff format. Example: training.arff
    #   2) Model evaluation summary. Example: predictions.txt
    #   3) Folders for each target dataset (the dataset on which classifications to be done) (example: 0000000060-10.40.7.172-PacL@Rnd_0_Rnd):
    #       3.1) Target dataset in arff format. Example: target.arff
    #       3.2) Classification results. Example: LMT.txt

    dst_folder = localizer_config.get_folder('dst')
    localizer_config.reset_path(dst_folder)

    localizer_log.msg("Initialising KPIs...")
    kpi_info.init(localizer_config.get_meta_path('kpi_indices'))
    localizer_log.msg("KPIs initialised")

    # Process the original file and put it to
    if localizer_config.component_enabled('preprocess'):
        preprocess.preprocess()

    # Add all classes to the all_classes global variable. Used in @attribute class {..} in training and target arffs.
    runtime.generate_classes_all()

    if mode == "train":

        # Reading training data from anomalies/training-data
        localizer_log.msg("Reading training data: Started.")
        training_dir = localizer_config.get_src_path('training')  # anomalies/training-data
        runtime.add_all(training_dir)
        localizer_log.msg("Reading training data: Completed.")

        if localizer_config.component_enabled('exp_filter'):
            experiments = exp_filter_manager.filter_(runtime.all_exps)
            localizer_log.msg("Exp. filter applied.")
        else:
            experiments = runtime.all_exps
            localizer_log.msg("No exp. filter applied.")

        # Generate training data set in arff format
        localizer_log.msg("Start generating the training.arff file (data for training).")
        training_dataset_arff_path = localizer_config.get_dst_path('training.arff')  # Example: data/classifications/training.arff
        arff_gen.gen_file(experiments, training_dataset_arff_path, "training", True)
        localizer_log.msg("The training.arff generated.")

        # Train
        path_to_save_training_summary = localizer_config.get_dst_path('predictions.txt')  # Example: data/classifications/predictions.txt
        weka_predict.train(training_dataset_arff_path, model_cache_file_name, evaluation_is_on, path_to_save_training_summary)

    if mode == "predict":

        # Reading training data from anomalies/test-data/
        localizer_log.msg("Reading data for classifications: Started.")
        target_dir = localizer_config.get_src_path('target')
        runtime.add_target(target_dir)
        localizer_log.msg("Reading data for classifications: Completed.")

        # Load cached model
        localizer_log.msg("Load model " + model_cache_file_name)
        weka_predict.load_model(model_cache_file_name)

        # Predict
        for exp_id, exp in runtime.targets_exps.items():
            exp_dst_path = localizer_config.get_dst_path(exp.exp_info['full_name'])
            localizer_config.reset_path(exp_dst_path)

            # Generate the target data set for predictions
            localizer_log.msg("Start generating the target.arff file (data for training).")
            exp_arff_path = os.path.join(exp_dst_path, 'target.arff')
            localizer_log.msg("target.arff file path: " + exp_arff_path)
            arff_gen.gen_file({exp_id: exp}, exp_arff_path, "test", fromzero=True)
            localizer_log.msg("The " + exp_arff_path + " generated.")

            # Make predictions
            localizer_log.msg("Start prediction.")
            weka_predict.predict(exp, exp_arff_path, exp_dst_path)
            localizer_log.msg("Prediction completed.")

    jvm.stop()
Esempio n. 8
0
def gen_file(exps, arff_path, data_set_type, fromzero=False):
    """Generates the WEKA arff file.

    The anomalies will be aggregated by a fixed length of window. For instance,
    if this length is set to 3, then the anomalies will be [(anomalies at T1),
    (anomalies at T1 to T2), (anomalies at T2 to T4), ... (anomalies at TN-2 to
    TN)]. The length is defined in '[predictor]/<sliding_window>' in the config
    file.

    Args:
        exps(dict): A dictionary which maps the id of an target experiment to
            its util.runtime.Observation class.
        arff_path(str): A string that represent the path of the arff file.
        fromzero(boolean): If set to True, then the end of the window starts
            from 0, else starts from window size.

    Returns:
        None
    """

    lines = []
    lines.append('@relation anomalies')

    # attach attributes
    for i, kpi in enumerate(kpi_info.kpi_list):
        lines.append("@attribute {kpi_name} {{TRUE, FALSE}}".format(
            kpi_name='_'.join(eval(kpi.tag))))

    # attach fault types
    exptag_name = config.get('exp_tag', 'tag')
    exptag_klass = localizer_config.get_plugin('exp_tag', exptag_name)
    import util.runtime as runtime
    lines.append('@attribute class {{{types}}}'.format(
        types=','.join(runtime.all_classes)))

    # Rahim added this lines
    fault_injection_minutes = [92, 110, 67, 32, 55, 50, 57, 34, 43, 56]

    lines.append('')

    # attach instances
    lines.append('@data')
    sliding_window = config.getint('predictor', 'sliding_window')
    for exp_id, exp in exps.items():

        data = exp.exp_data
        tag = exptag_klass.tag(exp)

        # Rahim added this lines
        sets_fault_string = str(tag).split("_")[0]
        if data_set_type == "test":

            if sets_fault_string == "failurefree":
                fault_injection_minute = 999999
            else:
                sets_exp_code = int(faults.index(sets_fault_string))
                fault_injection_minute = fault_injection_minutes[sets_exp_code]
        else:
            fault_injection_minute = 0

        print("exp_id:", exp_id, ". tag:", tag, ". data len:", len(data),
              "fault_injection_minute:", fault_injection_minute)
        # input("NEXT>>")

        for current, d in enumerate(data):
            if not fromzero and current + 1 < sliding_window:
                continue

            start = max(0, current + 1 - sliding_window)
            anomalies = reduce(lambda s1, s2: s1.union(s2),
                               [set(d) for d in data[start:current]], set())
            # create boolean list
            booleans = ["FALSE"] * len(kpi_info.kpi_list)
            # print(len(booleans))
            # print(len(anomalies))
            for idx in anomalies:
                booleans[idx] = "TRUE"

            # Rahim added this
            the_tag = tag
            if data_set_type == "test":
                if current < fault_injection_minute - 1:
                    the_tag = "failurefree_none"

            lines.append("{booleans}, {tag}".format(
                booleans=', '.join(booleans), tag=the_tag))

    with open(arff_path, 'w') as f:
        f.writelines('\n'.join(lines))