예제 #1
0
    def mannage_results(self) -> None:
        self.response, measurements = self.define_response(
            self.status, self.sim_values, self.settings)

        if self.settings['exec_mode'] in ['optimizer', 'tasks_optimizer']:
            if os.path.getsize(
                    os.path.join('outputs', self.settings['temp_file'])) > 0:
                sup.create_csv_file(measurements,
                                    os.path.join('outputs',
                                                 self.settings['temp_file']),
                                    mode='a')
            else:
                sup.create_csv_file_header(
                    measurements,
                    os.path.join('outputs', self.settings['temp_file']))
        else:
            print('------ Final results ------')
            [
                print(k, v, sep=': ') for k, v in self.response.items()
                if k != 'params'
            ]
            self.response.pop('params', None)
            sup.create_csv_file_header(
                [self.response],
                os.path.join('outputs', self.settings['temp_file']))
예제 #2
0
 def save_results(measurements, feature, parms):
     if measurements:
         if parms['is_single_exec']:
             output_route = os.path.join('output_files', parms['folder'],
                                         'results')
             model_name, _ = os.path.splitext(parms['model_file'])
             sup.create_csv_file_header(
                 measurements,
                 os.path.join(
                     output_route, model_name + '_' + feature + '_' +
                     parms['activity'] + '.csv'))
         else:
             if os.path.exists(
                     os.path.join(
                         'output_files',
                         feature + '_' + parms['activity'] + '.csv')):
                 sup.create_csv_file(measurements,
                                     os.path.join(
                                         'output_files', feature + '_' +
                                         parms['activity'] + '.csv'),
                                     mode='a')
             else:
                 sup.create_csv_file_header(
                     measurements,
                     os.path.join(
                         'output_files',
                         feature + '_' + parms['activity'] + '.csv'))
예제 #3
0
 def print_measures(self):
     """
     Prints the similarity results detail
     """
     print_path = os.path.join(self.output, 'sim_data', 'measures.csv')
     if os.path.exists(print_path):
         sup.create_csv_file(self.measures, print_path, mode='a')
     else:
         sup.create_csv_file_header(self.measures, print_path)
예제 #4
0
파일: simod.py 프로젝트: dtdi/Simod
 def save_times(times, settings):
     times = [{**{'output': settings['output']}, **times}]
     log_file = os.path.join('outputs', 'execution_times.csv')
     if not os.path.exists(log_file):
         open(log_file, 'w').close()
     if os.path.getsize(log_file) > 0:
         sup.create_csv_file(times, log_file, mode='a')
     else:
         sup.create_csv_file_header(times, log_file)
예제 #5
0
 def on_train_end(self, logs={}):
     log_file = os.path.join('output_files', 'training_times.csv')
     data = [{
         'output_folder': self.output_folder,
         'train_epochs': len(self.logs),
         'avg_time': np.mean(self.logs),
         'min_time': np.min(self.logs),
         'max_time': np.max(self.logs)
     }]
     if os.path.exists(log_file):
         sup.create_csv_file(data, log_file, mode='a')
     else:
         sup.create_csv_file_header(data, log_file)
예제 #6
0
def print_measures(settings, measurements):
    if os.path.exists(os.path.join(os.path.join(settings['output'],
                                                'sim_data',
                                                'similarity_measures.csv'))):
        sup.create_csv_file(measurements,
                            os.path.join(os.path.join(settings['output'],
                                                'sim_data',
                                                'similarity_measures.csv')), mode='a')
    else:
        sup.create_csv_file_header(measurements,
                                   os.path.join(os.path.join(settings['output'],
                                                'sim_data',
                                                'similarity_measures.csv')))
def save_results(measurements, feature, is_single_exec, parameters):    
    output_route = os.path.join('output_files', parameters['folder'])
    model_name, _ = os.path.splitext(parameters['model_file'])
    if measurements:    
        if is_single_exec:
                sup.create_csv_file_header(measurements, os.path.join(output_route,
                                                                      model_name +'_'+feature+'_full_suff.csv'))
        else:
            if os.path.exists(os.path.join('output_files', 'full_'+feature+'_suffix_measures.csv')):
                sup.create_csv_file(measurements, os.path.join('output_files',
                                                               'full_'+feature+'_suffix_measures.csv'), mode='a')
            else:
                sup.create_csv_file_header(measurements, os.path.join('output_files',
                                                               'full_'+feature+'_suffix_measures.csv'))
예제 #8
0
def save_results(measurements, feature, is_single_exec, model_file,
                 output_folder):
    model_name, _ = os.path.splitext(model_file)
    if measurements:
        if is_single_exec:
            sup.create_csv_file_header(
                measurements,
                os.path.join(output_folder,
                             model_name + '_' + feature + '_full_suff.csv'))
        else:
            if os.path.exists(
                    os.path.join(output_folder,
                                 'full_' + feature + '_suffix_measures.csv')):
                sup.create_csv_file(
                    measurements,
                    os.path.join(output_folder,
                                 'full_' + feature + '_suffix_measures.csv'),
                    mode='a')
            else:
                sup.create_csv_file_header(
                    measurements,
                    os.path.join(output_folder,
                                 'full_' + feature + '_suffix_measures.csv'))
예제 #9
0
def predict_next(timeformat, parameters, is_single_exec=True):
    """Main function of the suffix prediction module.
    Args:
        timeformat (str): event-log date-time format.
        parameters (dict): parameters used in the training step.
        is_single_exec (boolean): generate measurments stand alone or share
                    results with other runing experiments (optional)
    """
    global START_TIMEFORMAT
    global INDEX_AC
    global INDEX_RL
    global DIM
    global TBTW
    global EXP

    START_TIMEFORMAT = timeformat

    output_route = os.path.join('output_files', parameters['folder'])
    model_name, _ = os.path.splitext(parameters['model_file'])
    # Loading of testing dataframe
    df_test = pd.read_csv(
        os.path.join(output_route, 'parameters', 'test_log.csv'))
    df_test['start_timestamp'] = pd.to_datetime(df_test['start_timestamp'])
    df_test['end_timestamp'] = pd.to_datetime(df_test['end_timestamp'])
    df_test = df_test.drop(columns=['user'])
    df_test = df_test.rename(index=str, columns={"role": "user"})

    # Loading of parameters from training
    with open(os.path.join(output_route, 'parameters',
                           'model_parameters.json')) as file:
        data = json.load(file)
        EXP = {k: v for k, v in data['exp_desc'].items()}
        print(EXP)
        DIM['samples'] = int(data['dim']['samples'])
        DIM['time_dim'] = int(data['dim']['time_dim'])
        DIM['features'] = int(data['dim']['features'])
        TBTW['max_tbtw'] = float(data['max_tbtw'])
        INDEX_AC = {int(k): v for k, v in data['index_ac'].items()}
        INDEX_RL = {int(k): v for k, v in data['index_rl'].items()}
        file.close()

    if EXP['norm_method'] == 'max':
        max_tbtw = np.max(df_test.tbtw)
        norm = lambda x: x['tbtw'] / max_tbtw
        df_test['tbtw_norm'] = df_test.apply(norm, axis=1)
    elif EXP['norm_method'] == 'lognorm':
        logit = lambda x: math.log1p(x['tbtw'])
        df_test['tbtw_log'] = df_test.apply(logit, axis=1)
        max_tbtw = np.max(df_test.tbtw_log)
        norm = lambda x: x['tbtw_log'] / max_tbtw
        df_test['tbtw_norm'] = df_test.apply(norm, axis=1)

    ac_alias = create_alias(len(INDEX_AC))
    rl_alias = create_alias(len(INDEX_RL))

    #   Next event selection method and numbers of repetitions
    variants = [{
        'imp': 'Random Choice',
        'rep': 15
    }, {
        'imp': 'Arg Max',
        'rep': 1
    }]
    #   Generation of predictions
    model = load_model(os.path.join(output_route, parameters['model_file']))

    for var in variants:
        measurements = list()
        for i in range(0, var['rep']):

            prefixes = create_pref_suf(df_test, ac_alias, rl_alias)
            prefixes = predict(model, prefixes, ac_alias, rl_alias, var['imp'])

            accuracy = (np.sum([x['ac_true']
                                for x in prefixes]) / len(prefixes))

            if is_single_exec:
                sup.create_csv_file_header(
                    prefixes,
                    os.path.join(output_route,
                                 model_name + '_rep_' + str(i) + '_next.csv'))

            # Save results
            measurements.append({
                **dict(model=os.path.join(output_route, parameters['model_file']),
                       implementation=var['imp']),
                **{
                    'accuracy': accuracy
                },
                **EXP
            })
        if measurements:
            if is_single_exec:
                sup.create_csv_file_header(
                    measurements,
                    os.path.join(output_route, model_name + '_next.csv'))
            else:
                if os.path.exists(
                        os.path.join('output_files',
                                     'next_event_measures.csv')):
                    sup.create_csv_file(measurements,
                                        os.path.join(
                                            'output_files',
                                            'next_event_measures.csv'),
                                        mode='a')
                else:
                    sup.create_csv_file_header(
                        measurements,
                        os.path.join('output_files',
                                     'next_event_measures.csv'))
예제 #10
0
def objective(settings):
    """Main aplication method"""
    # Read settings from config file
    settings = read_settings(settings)
    # Output folder creation
    if not os.path.exists(settings['output']):
        os.makedirs(settings['output'])
        os.makedirs(os.path.join(settings['output'], 'sim_data'))
    # Copy event-log to output folder
    copyfile(os.path.join(settings['input'], settings['file']),
             os.path.join(settings['output'], settings['file']))
    # Event log reading
    log = lr.LogReader(os.path.join(settings['output'], settings['file']),
                       settings['timeformat'])
    # Execution steps
    mining_structure(settings, settings['epsilon'], settings['eta'])
    bpmn = br.BpmnReader(os.path.join(settings['output'],
                                      settings['file'].split('.')[0]+'.bpmn'))
    process_graph = gph.create_process_structure(bpmn)

    # Evaluate alignment
    chk.evaluate_alignment(process_graph, log, settings)

    print("-- Mining Simulation Parameters --")
    parameters, process_stats = par.extract_parameters(log, bpmn, process_graph)
    xml.print_parameters(os.path.join(settings['output'],
                                      settings['file'].split('.')[0]+'.bpmn'),
                         os.path.join(settings['output'],
                                      settings['file'].split('.')[0]+'.bpmn'),
                         parameters)
    response = dict()
    measurements = list()
    status = STATUS_OK
    sim_values = list()
    process_stats = pd.DataFrame.from_records(process_stats)
    for rep in range(settings['repetitions']):
        print("Experiment #" + str(rep + 1))
        try:
            simulate(settings, rep)
            process_stats = process_stats.append(measure_stats(settings,
                                                               bpmn, rep),
                                                 ignore_index=True,
                                                 sort=False)
            sim_values.append(gen.mesurement(process_stats, settings, rep))
        except:
            status = STATUS_FAIL
            break

    data = {'alg_manag': settings['alg_manag'],
            'epsilon': settings['epsilon'],
            'eta': settings['eta'],
            'output': settings['output']
            }
    if status == STATUS_OK:
        loss = (1 - np.mean([x['act_norm'] for x in sim_values]))
        if loss < 0:
            response = {'loss': loss, 'params': settings, 'status': STATUS_FAIL}
            measurements.append({**{'loss': loss, 'status': STATUS_FAIL}, **data})
        else:
            response = {'loss': loss, 'params': settings, 'status': status}
            measurements.append({**{'loss': loss, 'status': status}, **data})
    else:
        response = {'params': settings, 'status': status}
        measurements.append({**{'loss': 1, 'status': status}, **data})
   
    if os.path.getsize(os.path.join('outputs', settings['temp_file'])) > 0:
        sup.create_csv_file(measurements, os.path.join('outputs', settings['temp_file']),mode='a')
    else:
        sup.create_csv_file_header(measurements, os.path.join('outputs', settings['temp_file']))
    return response
예제 #11
0
def predict_suffix(output_folder, model_file, is_single_exec=True):
    """Main function of the suffix prediction module.
    Args:
        timeformat (str): event-log date-time format.
        parameters (dict): parameters used in the training step.
        is_single_exec (boolean): generate measurments stand alone or share
                    results with other runing experiments (optional)
    """
    global START_TIMEFORMAT
    global INDEX_AC
    global INDEX_RL
    global DIM
    global TBTW
    global EXP

    START_TIMEFORMAT = timeformat

    max_trace_size = 100

    output_route = os.path.join('..', 'Camargo', 'output_files', output_folder)
    model_name, _ = os.path.splitext(model_file)
    # Loading of testing dataframe
    df_test = pd.read_csv(
        os.path.join(output_route, 'parameters', 'test_log.csv'))
    #df_test['start_timestamp'] = pd.to_datetime(df_test['start_timestamp'])
    #df_test['end_timestamp'] = pd.to_datetime(df_test['end_timestamp'])
    #df_test = df_test.drop(columns=['user'])
    #df_test = df_test.rename(index=str, columns={"role": "user"})

    # Loading of parameters from training
    with open(os.path.join(output_route, 'parameters',
                           'model_parameters.json')) as file:
        data = json.load(file)
        EXP = {k: v for k, v in data['exp_desc'].items()}
        print(EXP)
        DIM['samples'] = int(data['dim']['samples'])
        DIM['time_dim'] = int(data['dim']['time_dim'])
        DIM['features'] = int(data['dim']['features'])
        INDEX_AC = {int(k): v for k, v in data['index_ac'].items()}
        INDEX_RL = {int(k): v for k, v in data['index_rl'].items()}
        file.close()

    ac_alias = create_alias(len(INDEX_AC))
    rl_alias = create_alias(len(INDEX_RL))

    #   Next event selection method and numbers of repetitions
    variants = [{
        'imp': 'Random Choice',
        'rep': 2
    }, {
        'imp': 'Arg Max',
        'rep': 1
    }]

    #   Generation of predictions
    model = load_model(os.path.join(output_route, model_file))

    for var in variants:
        args = dict(df_test=df_test,
                    ac_alias=ac_alias,
                    rl_alias=rl_alias,
                    output_route=output_route,
                    model_file=model_file,
                    imp=var['imp'],
                    max_trace_size=max_trace_size)

        measurements = list()
        for i in range(0, var['rep']):
            results = execute_experiments([2, 5, 8, 10, 15, 20], model, args)
            # Save results
            measurements.append({
                **dict(model=os.path.join(output_route, model_file),
                       implementation=var['imp']),
                **results,
                **EXP
            })
        if measurements:
            if is_single_exec:
                sup.create_csv_file_header(
                    measurements,
                    os.path.join(output_route, model_name + '_sufix.csv'))
            else:
                if os.path.exists(
                        os.path.join(output_route, 'sufix_measures.csv')):
                    sup.create_csv_file(measurements,
                                        os.path.join(output_route,
                                                     'sufix_measures.csv'),
                                        mode='a')
                else:
                    sup.create_csv_file_header(
                        measurements,
                        os.path.join(output_route, 'sufix_measures.csv'))
예제 #12
0
def predict(timeformat, parameters, is_single_exec=True):
    """Main function of the event log generation module.
    Args:
        timeformat (str): event-log date-time format.
        parameters (dict): parameters used in the training step.
        is_single_exec (boolean): generate measurments stand alone or share
                    results with other runing experiments (optional)
    """
    global START_TIMEFORMAT
    global INDEX_AC
    global INDEX_RL
    global DIM
    global TBTW
    global EXP

    START_TIMEFORMAT = timeformat

    output_route = os.path.join('output_files', parameters['folder'])
    model_name, _ = os.path.splitext(parameters['model_file'])
    # Loading of testing dataframe
    df_test = pd.read_csv(
        os.path.join(output_route, 'parameters', 'test_log.csv'))
    df_test['start_timestamp'] = pd.to_datetime(df_test['start_timestamp'])
    df_test['end_timestamp'] = pd.to_datetime(df_test['end_timestamp'])
    df_test = df_test.drop(columns=['user'])
    df_test = df_test.rename(index=str, columns={"role": "user"})

    # Loading of parameters from training
    with open(os.path.join(output_route, 'parameters',
                           'model_parameters.json')) as file:
        data = json.load(file)
        EXP = {k: v for k, v in data['exp_desc'].items()}
        print(EXP)
        DIM['samples'] = int(data['dim']['samples'])
        DIM['time_dim'] = int(data['dim']['time_dim'])
        DIM['features'] = int(data['dim']['features'])
        TBTW['max_tbtw'] = float(data['max_tbtw'])
        INDEX_AC = {int(k): v for k, v in data['index_ac'].items()}
        INDEX_RL = {int(k): v for k, v in data['index_rl'].items()}
        file.close()

#   Next event selection method and numbers of repetitions
    variants = [{
        'imp': 'Random Choice',
        'rep': 1
    }, {
        'imp': 'Arg Max',
        'rep': 0
    }]
    #   Generation of predictions
    model = load_model(os.path.join(output_route, parameters['model_file']))
    df_test_log = df_test.to_dict('records')

    for var in variants:
        for _ in range(0, var['rep']):
            generated_event_log = generate_traces(model, var['imp'],
                                                  len(df_test.caseid.unique()),
                                                  200)
            sim_task = gen.gen_mesurement(df_test_log, generated_event_log,
                                          'task')
            sim_role = gen.gen_mesurement(df_test_log, generated_event_log,
                                          'user')
            if is_single_exec:
                sup.create_csv_file_header(
                    sim_task,
                    os.path.join(output_route, model_name + '_similarity.csv'))
                sup.create_csv_file_header(
                    generated_event_log,
                    os.path.join(output_route, model_name + '_log.csv'))

            # Save results
            measurements = list()
            measurements.append({
                **dict(model=os.path.join(output_route, parameters['model_file']),
                       implementation=var['imp'],
                       dl_task=np.mean([x['sim_score'] for x in sim_task]),
                       dl_user=np.mean([x['sim_score'] for x in sim_role]),
                       mae=np.mean([x['abs_err'] for x in sim_task]),
                       dlt=np.mean([x['sim_score_t'] for x in sim_task])),
                **EXP
            })
            if is_single_exec:
                sup.create_csv_file_header(
                    measurements,
                    os.path.join('output_files', model_name + '_measures.csv'))
            else:
                if os.path.exists(
                        os.path.join('output_files', 'total_measures.csv')):
                    sup.create_csv_file(measurements,
                                        os.path.join('output_files',
                                                     'total_measures.csv'),
                                        mode='a')
                else:
                    sup.create_csv_file_header(
                        measurements,
                        os.path.join('output_files', 'total_measures.csv'))