Esempio n. 1
0
 def save_results(measurements, feature, parms):
     if measurements:
         if parms['is_single_exec']:
             output_route = os.path.join('output_files', parms['folder'],
                                         'results')
             model_name, _ = os.path.splitext(parms['model_file'])
             sup.create_csv_file_header(
                 measurements,
                 os.path.join(
                     output_route, model_name + '_' + feature + '_' +
                     parms['activity'] + '.csv'))
         else:
             if os.path.exists(
                     os.path.join(
                         'output_files',
                         feature + '_' + parms['activity'] + '.csv')):
                 sup.create_csv_file(measurements,
                                     os.path.join(
                                         'output_files', feature + '_' +
                                         parms['activity'] + '.csv'),
                                     mode='a')
             else:
                 sup.create_csv_file_header(
                     measurements,
                     os.path.join(
                         'output_files',
                         feature + '_' + parms['activity'] + '.csv'))
Esempio n. 2
0
    def mannage_results(self) -> None:
        self.response, measurements = self.define_response(
            self.status, self.sim_values, self.settings)

        if self.settings['exec_mode'] in ['optimizer', 'tasks_optimizer']:
            if os.path.getsize(
                    os.path.join('outputs', self.settings['temp_file'])) > 0:
                sup.create_csv_file(measurements,
                                    os.path.join('outputs',
                                                 self.settings['temp_file']),
                                    mode='a')
            else:
                sup.create_csv_file_header(
                    measurements,
                    os.path.join('outputs', self.settings['temp_file']))
        else:
            print('------ Final results ------')
            [
                print(k, v, sep=': ') for k, v in self.response.items()
                if k != 'params'
            ]
            self.response.pop('params', None)
            sup.create_csv_file_header(
                [self.response],
                os.path.join('outputs', self.settings['temp_file']))
    def export_parms(self, parms):
        if not os.path.exists(self.output_folder):
            os.makedirs(self.output_folder)
            os.makedirs(os.path.join(self.output_folder, 'parameters'))

        parms['index_ac'] = self.index_ac
        parms['index_rl'] = self.index_rl
        if parms['model_type'] in [
                'shared_cat', 'shared_cat_inter', 'shared_cat_rd',
                'shared_cat_wl', 'shared_cat_cx', 'cnn_lstm',
                'shared_cat_city', 'shared_cat_snap', 'shared_cat_inter_full',
                'cnn_lstm_inter', 'cnn_lstm_inter_full'
        ]:
            shape = self.examples['prefixes']['activities'].shape
            parms['dim'] = dict(samples=str(shape[0]),
                                time_dim=str(shape[1]),
                                features=str(len(self.ac_index)))
        else:
            shape = self.examples['encoder_input_data']['activities'].shape
            parms['dim'] = dict(samples=str(shape[0]),
                                time_dim=str(shape[1]),
                                features=str(len(self.ac_index)))

        sup.create_json(
            parms,
            os.path.join(self.output_folder, 'parameters',
                         'model_parameters.json'))
        sup.create_csv_file_header(
            self.log_test.to_dict('records'),
            os.path.join(self.output_folder, 'parameters', 'test_log.csv'))
Esempio n. 4
0
 def print_measures(self):
     """
     Prints the similarity results detail
     """
     print_path = os.path.join(self.output, 'sim_data', 'measures.csv')
     if os.path.exists(print_path):
         sup.create_csv_file(self.measures, print_path, mode='a')
     else:
         sup.create_csv_file_header(self.measures, print_path)
Esempio n. 5
0
File: simod.py Progetto: dtdi/Simod
 def save_times(times, settings):
     times = [{**{'output': settings['output']}, **times}]
     log_file = os.path.join('outputs', 'execution_times.csv')
     if not os.path.exists(log_file):
         open(log_file, 'w').close()
     if os.path.getsize(log_file) > 0:
         sup.create_csv_file(times, log_file, mode='a')
     else:
         sup.create_csv_file_header(times, log_file)
Esempio n. 6
0
 def on_train_end(self, logs={}):
     log_file = os.path.join('output_files', 'training_times.csv')
     data = [{
         'output_folder': self.output_folder,
         'train_epochs': len(self.logs),
         'avg_time': np.mean(self.logs),
         'min_time': np.min(self.logs),
         'max_time': np.max(self.logs)
     }]
     if os.path.exists(log_file):
         sup.create_csv_file(data, log_file, mode='a')
     else:
         sup.create_csv_file_header(data, log_file)
Esempio n. 7
0
def print_measures(settings, measurements):
    if os.path.exists(os.path.join(os.path.join(settings['output'],
                                                'sim_data',
                                                'similarity_measures.csv'))):
        sup.create_csv_file(measurements,
                            os.path.join(os.path.join(settings['output'],
                                                'sim_data',
                                                'similarity_measures.csv')), mode='a')
    else:
        sup.create_csv_file_header(measurements,
                                   os.path.join(os.path.join(settings['output'],
                                                'sim_data',
                                                'similarity_measures.csv')))
def save_results(measurements, feature, is_single_exec, parameters):    
    output_route = os.path.join('output_files', parameters['folder'])
    model_name, _ = os.path.splitext(parameters['model_file'])
    if measurements:    
        if is_single_exec:
                sup.create_csv_file_header(measurements, os.path.join(output_route,
                                                                      model_name +'_'+feature+'_full_suff.csv'))
        else:
            if os.path.exists(os.path.join('output_files', 'full_'+feature+'_suffix_measures.csv')):
                sup.create_csv_file(measurements, os.path.join('output_files',
                                                               'full_'+feature+'_suffix_measures.csv'), mode='a')
            else:
                sup.create_csv_file_header(measurements, os.path.join('output_files',
                                                               'full_'+feature+'_suffix_measures.csv'))
Esempio n. 9
0
def analize_log_schedule(resource_table, log):
    # Define and assign schedule tables
    time_table, resource_table = create_timetables(resource_table, dtype='247')
    log_data = log.data
    for resource in resource_table:
        # Calculate worked days
        resource['w_days_intime'], resource['w_days_offtime'] = worked_days(
            resource, log_data)
        available_time = (resource['schedule']['end_hour'] -
                          resource['schedule']['start_hour']).total_seconds()
        # Calculate resource availability
        resource['ava_intime'] = available_time * resource['w_days_intime']
        resource['ava_offtime'] = available_time * resource['w_days_offtime']
    #Print availability per role
    sup.create_csv_file_header(resource_table, 'schedule.csv')
    [print(x) for x in roles_availability(resource_table)]
Esempio n. 10
0
def save_results(measurements, feature, is_single_exec, model_file,
                 output_folder):
    model_name, _ = os.path.splitext(model_file)
    if measurements:
        if is_single_exec:
            sup.create_csv_file_header(
                measurements,
                os.path.join(output_folder,
                             model_name + '_' + feature + '_full_suff.csv'))
        else:
            if os.path.exists(
                    os.path.join(output_folder,
                                 'full_' + feature + '_suffix_measures.csv')):
                sup.create_csv_file(
                    measurements,
                    os.path.join(output_folder,
                                 'full_' + feature + '_suffix_measures.csv'),
                    mode='a')
            else:
                sup.create_csv_file_header(
                    measurements,
                    os.path.join(output_folder,
                                 'full_' + feature + '_suffix_measures.csv'))
Esempio n. 11
0
def predict_next(timeformat, parameters, is_single_exec=True):
    """Main function of the suffix prediction module.
    Args:
        timeformat (str): event-log date-time format.
        parameters (dict): parameters used in the training step.
        is_single_exec (boolean): generate measurments stand alone or share
                    results with other runing experiments (optional)
    """
    global START_TIMEFORMAT
    global INDEX_AC
    global INDEX_RL
    global DIM
    global TBTW
    global EXP

    START_TIMEFORMAT = timeformat

    output_route = os.path.join('output_files', parameters['folder'])
    model_name, _ = os.path.splitext(parameters['model_file'])
    # Loading of testing dataframe
    df_test = pd.read_csv(
        os.path.join(output_route, 'parameters', 'test_log.csv'))
    df_test['start_timestamp'] = pd.to_datetime(df_test['start_timestamp'])
    df_test['end_timestamp'] = pd.to_datetime(df_test['end_timestamp'])
    df_test = df_test.drop(columns=['user'])
    df_test = df_test.rename(index=str, columns={"role": "user"})

    # Loading of parameters from training
    with open(os.path.join(output_route, 'parameters',
                           'model_parameters.json')) as file:
        data = json.load(file)
        EXP = {k: v for k, v in data['exp_desc'].items()}
        print(EXP)
        DIM['samples'] = int(data['dim']['samples'])
        DIM['time_dim'] = int(data['dim']['time_dim'])
        DIM['features'] = int(data['dim']['features'])
        TBTW['max_tbtw'] = float(data['max_tbtw'])
        INDEX_AC = {int(k): v for k, v in data['index_ac'].items()}
        INDEX_RL = {int(k): v for k, v in data['index_rl'].items()}
        file.close()

    if EXP['norm_method'] == 'max':
        max_tbtw = np.max(df_test.tbtw)
        norm = lambda x: x['tbtw'] / max_tbtw
        df_test['tbtw_norm'] = df_test.apply(norm, axis=1)
    elif EXP['norm_method'] == 'lognorm':
        logit = lambda x: math.log1p(x['tbtw'])
        df_test['tbtw_log'] = df_test.apply(logit, axis=1)
        max_tbtw = np.max(df_test.tbtw_log)
        norm = lambda x: x['tbtw_log'] / max_tbtw
        df_test['tbtw_norm'] = df_test.apply(norm, axis=1)

    ac_alias = create_alias(len(INDEX_AC))
    rl_alias = create_alias(len(INDEX_RL))

    #   Next event selection method and numbers of repetitions
    variants = [{
        'imp': 'Random Choice',
        'rep': 15
    }, {
        'imp': 'Arg Max',
        'rep': 1
    }]
    #   Generation of predictions
    model = load_model(os.path.join(output_route, parameters['model_file']))

    for var in variants:
        measurements = list()
        for i in range(0, var['rep']):

            prefixes = create_pref_suf(df_test, ac_alias, rl_alias)
            prefixes = predict(model, prefixes, ac_alias, rl_alias, var['imp'])

            accuracy = (np.sum([x['ac_true']
                                for x in prefixes]) / len(prefixes))

            if is_single_exec:
                sup.create_csv_file_header(
                    prefixes,
                    os.path.join(output_route,
                                 model_name + '_rep_' + str(i) + '_next.csv'))

            # Save results
            measurements.append({
                **dict(model=os.path.join(output_route, parameters['model_file']),
                       implementation=var['imp']),
                **{
                    'accuracy': accuracy
                },
                **EXP
            })
        if measurements:
            if is_single_exec:
                sup.create_csv_file_header(
                    measurements,
                    os.path.join(output_route, model_name + '_next.csv'))
            else:
                if os.path.exists(
                        os.path.join('output_files',
                                     'next_event_measures.csv')):
                    sup.create_csv_file(measurements,
                                        os.path.join(
                                            'output_files',
                                            'next_event_measures.csv'),
                                        mode='a')
                else:
                    sup.create_csv_file_header(
                        measurements,
                        os.path.join('output_files',
                                     'next_event_measures.csv'))
Esempio n. 12
0
def objective(settings):
    """Main aplication method"""
    # Read settings from config file
    settings = read_settings(settings)
    # Output folder creation
    if not os.path.exists(settings['output']):
        os.makedirs(settings['output'])
        os.makedirs(os.path.join(settings['output'], 'sim_data'))
    # Copy event-log to output folder
    copyfile(os.path.join(settings['input'], settings['file']),
             os.path.join(settings['output'], settings['file']))
    # Event log reading
    log = lr.LogReader(os.path.join(settings['output'], settings['file']),
                       settings['timeformat'])
    # Execution steps
    mining_structure(settings, settings['epsilon'], settings['eta'])
    bpmn = br.BpmnReader(os.path.join(settings['output'],
                                      settings['file'].split('.')[0]+'.bpmn'))
    process_graph = gph.create_process_structure(bpmn)

    # Evaluate alignment
    chk.evaluate_alignment(process_graph, log, settings)

    print("-- Mining Simulation Parameters --")
    parameters, process_stats = par.extract_parameters(log, bpmn, process_graph)
    xml.print_parameters(os.path.join(settings['output'],
                                      settings['file'].split('.')[0]+'.bpmn'),
                         os.path.join(settings['output'],
                                      settings['file'].split('.')[0]+'.bpmn'),
                         parameters)
    response = dict()
    measurements = list()
    status = STATUS_OK
    sim_values = list()
    process_stats = pd.DataFrame.from_records(process_stats)
    for rep in range(settings['repetitions']):
        print("Experiment #" + str(rep + 1))
        try:
            simulate(settings, rep)
            process_stats = process_stats.append(measure_stats(settings,
                                                               bpmn, rep),
                                                 ignore_index=True,
                                                 sort=False)
            sim_values.append(gen.mesurement(process_stats, settings, rep))
        except:
            status = STATUS_FAIL
            break

    data = {'alg_manag': settings['alg_manag'],
            'epsilon': settings['epsilon'],
            'eta': settings['eta'],
            'output': settings['output']
            }
    if status == STATUS_OK:
        loss = (1 - np.mean([x['act_norm'] for x in sim_values]))
        if loss < 0:
            response = {'loss': loss, 'params': settings, 'status': STATUS_FAIL}
            measurements.append({**{'loss': loss, 'status': STATUS_FAIL}, **data})
        else:
            response = {'loss': loss, 'params': settings, 'status': status}
            measurements.append({**{'loss': loss, 'status': status}, **data})
    else:
        response = {'params': settings, 'status': status}
        measurements.append({**{'loss': 1, 'status': status}, **data})
   
    if os.path.getsize(os.path.join('outputs', settings['temp_file'])) > 0:
        sup.create_csv_file(measurements, os.path.join('outputs', settings['temp_file']),mode='a')
    else:
        sup.create_csv_file_header(measurements, os.path.join('outputs', settings['temp_file']))
    return response
Esempio n. 13
0
def predict_suffix(output_folder, model_file, is_single_exec=True):
    """Main function of the suffix prediction module.
    Args:
        timeformat (str): event-log date-time format.
        parameters (dict): parameters used in the training step.
        is_single_exec (boolean): generate measurments stand alone or share
                    results with other runing experiments (optional)
    """
    global START_TIMEFORMAT
    global INDEX_AC
    global INDEX_RL
    global DIM
    global TBTW
    global EXP

    START_TIMEFORMAT = timeformat

    max_trace_size = 100

    output_route = os.path.join('..', 'Camargo', 'output_files', output_folder)
    model_name, _ = os.path.splitext(model_file)
    # Loading of testing dataframe
    df_test = pd.read_csv(
        os.path.join(output_route, 'parameters', 'test_log.csv'))
    #df_test['start_timestamp'] = pd.to_datetime(df_test['start_timestamp'])
    #df_test['end_timestamp'] = pd.to_datetime(df_test['end_timestamp'])
    #df_test = df_test.drop(columns=['user'])
    #df_test = df_test.rename(index=str, columns={"role": "user"})

    # Loading of parameters from training
    with open(os.path.join(output_route, 'parameters',
                           'model_parameters.json')) as file:
        data = json.load(file)
        EXP = {k: v for k, v in data['exp_desc'].items()}
        print(EXP)
        DIM['samples'] = int(data['dim']['samples'])
        DIM['time_dim'] = int(data['dim']['time_dim'])
        DIM['features'] = int(data['dim']['features'])
        INDEX_AC = {int(k): v for k, v in data['index_ac'].items()}
        INDEX_RL = {int(k): v for k, v in data['index_rl'].items()}
        file.close()

    ac_alias = create_alias(len(INDEX_AC))
    rl_alias = create_alias(len(INDEX_RL))

    #   Next event selection method and numbers of repetitions
    variants = [{
        'imp': 'Random Choice',
        'rep': 2
    }, {
        'imp': 'Arg Max',
        'rep': 1
    }]

    #   Generation of predictions
    model = load_model(os.path.join(output_route, model_file))

    for var in variants:
        args = dict(df_test=df_test,
                    ac_alias=ac_alias,
                    rl_alias=rl_alias,
                    output_route=output_route,
                    model_file=model_file,
                    imp=var['imp'],
                    max_trace_size=max_trace_size)

        measurements = list()
        for i in range(0, var['rep']):
            results = execute_experiments([2, 5, 8, 10, 15, 20], model, args)
            # Save results
            measurements.append({
                **dict(model=os.path.join(output_route, model_file),
                       implementation=var['imp']),
                **results,
                **EXP
            })
        if measurements:
            if is_single_exec:
                sup.create_csv_file_header(
                    measurements,
                    os.path.join(output_route, model_name + '_sufix.csv'))
            else:
                if os.path.exists(
                        os.path.join(output_route, 'sufix_measures.csv')):
                    sup.create_csv_file(measurements,
                                        os.path.join(output_route,
                                                     'sufix_measures.csv'),
                                        mode='a')
                else:
                    sup.create_csv_file_header(
                        measurements,
                        os.path.join(output_route, 'sufix_measures.csv'))
Esempio n. 14
0
def predict(timeformat, parameters, is_single_exec=True):
    """Main function of the event log generation module.
    Args:
        timeformat (str): event-log date-time format.
        parameters (dict): parameters used in the training step.
        is_single_exec (boolean): generate measurments stand alone or share
                    results with other runing experiments (optional)
    """
    global START_TIMEFORMAT
    global INDEX_AC
    global INDEX_RL
    global DIM
    global TBTW
    global EXP

    START_TIMEFORMAT = timeformat

    output_route = os.path.join('output_files', parameters['folder'])
    model_name, _ = os.path.splitext(parameters['model_file'])
    # Loading of testing dataframe
    df_test = pd.read_csv(
        os.path.join(output_route, 'parameters', 'test_log.csv'))
    df_test['start_timestamp'] = pd.to_datetime(df_test['start_timestamp'])
    df_test['end_timestamp'] = pd.to_datetime(df_test['end_timestamp'])
    df_test = df_test.drop(columns=['user'])
    df_test = df_test.rename(index=str, columns={"role": "user"})

    # Loading of parameters from training
    with open(os.path.join(output_route, 'parameters',
                           'model_parameters.json')) as file:
        data = json.load(file)
        EXP = {k: v for k, v in data['exp_desc'].items()}
        print(EXP)
        DIM['samples'] = int(data['dim']['samples'])
        DIM['time_dim'] = int(data['dim']['time_dim'])
        DIM['features'] = int(data['dim']['features'])
        TBTW['max_tbtw'] = float(data['max_tbtw'])
        INDEX_AC = {int(k): v for k, v in data['index_ac'].items()}
        INDEX_RL = {int(k): v for k, v in data['index_rl'].items()}
        file.close()

#   Next event selection method and numbers of repetitions
    variants = [{
        'imp': 'Random Choice',
        'rep': 1
    }, {
        'imp': 'Arg Max',
        'rep': 0
    }]
    #   Generation of predictions
    model = load_model(os.path.join(output_route, parameters['model_file']))
    df_test_log = df_test.to_dict('records')

    for var in variants:
        for _ in range(0, var['rep']):
            generated_event_log = generate_traces(model, var['imp'],
                                                  len(df_test.caseid.unique()),
                                                  200)
            sim_task = gen.gen_mesurement(df_test_log, generated_event_log,
                                          'task')
            sim_role = gen.gen_mesurement(df_test_log, generated_event_log,
                                          'user')
            if is_single_exec:
                sup.create_csv_file_header(
                    sim_task,
                    os.path.join(output_route, model_name + '_similarity.csv'))
                sup.create_csv_file_header(
                    generated_event_log,
                    os.path.join(output_route, model_name + '_log.csv'))

            # Save results
            measurements = list()
            measurements.append({
                **dict(model=os.path.join(output_route, parameters['model_file']),
                       implementation=var['imp'],
                       dl_task=np.mean([x['sim_score'] for x in sim_task]),
                       dl_user=np.mean([x['sim_score'] for x in sim_role]),
                       mae=np.mean([x['abs_err'] for x in sim_task]),
                       dlt=np.mean([x['sim_score_t'] for x in sim_task])),
                **EXP
            })
            if is_single_exec:
                sup.create_csv_file_header(
                    measurements,
                    os.path.join('output_files', model_name + '_measures.csv'))
            else:
                if os.path.exists(
                        os.path.join('output_files', 'total_measures.csv')):
                    sup.create_csv_file(measurements,
                                        os.path.join('output_files',
                                                     'total_measures.csv'),
                                        mode='a')
                else:
                    sup.create_csv_file_header(
                        measurements,
                        os.path.join('output_files', 'total_measures.csv'))
Esempio n. 15
0
def training_model(timeformat, args, no_loops=False):
    """Main method of the training module.
    Args:
        timeformat (str): event-log date-time format.
        args (dict): parameters for training the network.
        no_loops (boolean): remove loops fom the event-log (optional).
    """
    parameters = dict()
    log = lr.LogReader(os.path.join('input_files', args['file_name']),
                       timeformat,
                       timeformat,
                       one_timestamp=True)
    _, resource_table = rl.read_resource_pool(log, sim_percentage=0.50)
    # Role discovery
    log_df_resources = pd.DataFrame.from_records(resource_table)
    log_df_resources = log_df_resources.rename(index=str,
                                               columns={"resource": "user"})
    # Dataframe creation
    log_df = pd.DataFrame.from_records(log.data)
    log_df = log_df.merge(log_df_resources, on='user', how='left')
    log_df = log_df[log_df.task != 'Start']
    log_df = log_df[log_df.task != 'End']
    log_df = log_df.reset_index(drop=True)

    if no_loops:
        log_df = nsup.reduce_loops(log_df)
    # Index creation
    ac_index = create_index(log_df, 'task')
    ac_index['start'] = 0
    ac_index['end'] = len(ac_index)
    index_ac = {v: k for k, v in ac_index.items()}

    rl_index = create_index(log_df, 'role')
    rl_index['start'] = 0
    rl_index['end'] = len(rl_index)
    index_rl = {v: k for k, v in rl_index.items()}

    # Load embedded matrix
    ac_weights = load_embedded(
        index_ac, 'ac_' + args['file_name'].split('.')[0] + '.emb')
    rl_weights = load_embedded(
        index_rl, 'rl_' + args['file_name'].split('.')[0] + '.emb')
    # Calculate relative times
    log_df = add_calculated_features(log_df, ac_index, rl_index)
    # Split validation datasets
    log_df_train, log_df_test = nsup.split_train_test(log_df, 0.3)  # 70%/30%
    # Input vectorization
    vec = vectorization(log_df_train, ac_index, rl_index, args)
    # Parameters export
    output_folder = os.path.join('output_files', sup.folder_id())
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        os.makedirs(os.path.join(output_folder, 'parameters'))

    parameters['event_log'] = args['file_name']
    parameters['exp_desc'] = args
    parameters['index_ac'] = index_ac
    parameters['index_rl'] = index_rl
    parameters['dim'] = dict(samples=str(vec['prefixes']['x_ac_inp'].shape[0]),
                             time_dim=str(
                                 vec['prefixes']['x_ac_inp'].shape[1]),
                             features=str(len(ac_index)))
    parameters['max_tbtw'] = vec['max_tbtw']

    sup.create_json(
        parameters,
        os.path.join(output_folder, 'parameters', 'model_parameters.json'))
    sup.create_csv_file_header(
        log_df_test.to_dict('records'),
        os.path.join(output_folder, 'parameters', 'test_log.csv'))

    if args['model_type'] == 'joint':
        mj.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'shared':
        msh.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'specialized':
        msp.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'concatenated':
        mcat.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'shared_cat':
        mshcat.training_model(vec, ac_weights, rl_weights, output_folder, args)