Ejemplo n.º 1
0
 def read_stats(settings, bpmn, rep):
     """Reads the simulation results stats
     Args:
         settings (dict): Path to jar and file names
         rep (int): repetition number
     """
     m_settings = dict()
     m_settings['output'] = settings['output']
     m_settings['file'] = settings['file']
     column_names = {'resource': 'user'}
     m_settings['read_options'] = settings['read_options']
     m_settings['read_options']['timeformat'] = '%Y-%m-%d %H:%M:%S.%f'
     m_settings['read_options']['column_names'] = column_names
     temp = lr.LogReader(
         os.path.join(
             m_settings['output'], 'sim_data',
             m_settings['file'].split('.')[0] + '_' + str(rep + 1) +
             '.csv'), m_settings['read_options'])
     process_graph = gph.create_process_structure(bpmn)
     results_replayer = rpl.LogReplayer(process_graph,
                                        temp,
                                        settings,
                                        source='simulation',
                                        run_num=rep + 1)
     temp_stats = results_replayer.process_stats
     temp_stats['role'] = temp_stats['resource']
     return temp_stats
def read_log(parms):
    parms['read_options']['filter_d_attrib'] = True
    log = lr.LogReader(os.path.join('input_files', parms['file_name']),
                       parms['read_options'])
    log_df = pd.DataFrame(log.data)
    log_df['end_time'] = log_df['end_timestamp'].astype(np.int64) // 10**9
    if parms['one_timestamp']:
        log_df = log_df.to_dict('records')
        log_df = sorted(log_df, key=lambda x: x['caseid'])
        for key, group in itertools.groupby(log_df, key=lambda x: x['caseid']):
            events = list(group)
            events = sorted(events, key=itemgetter('end_timestamp'))
            for i in range(0, len(events)):
                # In one-timestamp approach the first activity of the trace is taken as instant
                # since there is no previous timestamp to find a range
                if i == 0:
                    events[i]['start_time'] = events[i]['end_time'] - 1
                else:
                    events[i]['start_time'] = events[i - 1]['end_time']
        log_df = pd.DataFrame(log_df)
    else:
        print(log_df)
        log_df['start_time'] = log_df['start_timestamp'].astype(
            np.int64) // 10**9
    log_df = log_df[log_df.task != 'Start']
    log_df = log_df[log_df.task != 'End']
    log_df = log_df.reset_index(drop=True)
    log_df.sort_values(by=['caseid', 'start_time'],
                       ascending=[True, True],
                       inplace=True)
    log_df.reset_index(inplace=True, drop=True)
    log_df['event_id'] = log_df.index
    return log_df
Ejemplo n.º 3
0
def training_model(parameters, timeformat, no_loops=False):
    """Main method of the embedding training module.
    Args:
        parameters (dict): parameters for training the embeddeding network.
        timeformat (str): event-log date-time format.
        no_loops (boolean): remove loops fom the event-log (optional).
    """
    log = lr.LogReader(os.path.join('input_files', parameters['file_name']),
                       timeformat,
                       timeformat,
                       one_timestamp=True)

    # Pre-processing tasks
    _, resource_table = rl.read_resource_pool(log, sim_percentage=0.50)
    # Role discovery
    log_df_resources = pd.DataFrame.from_records(resource_table)
    log_df_resources = log_df_resources.rename(index=str,
                                               columns={"resource": "user"})
    # Dataframe creation
    log_df = pd.DataFrame.from_records(log.data)
    log_df = log_df.merge(log_df_resources, on='user', how='left')
    log_df = log_df[log_df.task != 'Start']
    log_df = log_df[log_df.task != 'End']
    log_df = log_df.reset_index(drop=True)

    if no_loops:
        log_df = nsup.reduce_loops(log_df)
    # Index creation
    ac_index = create_index(log_df, 'task')
    ac_index['start'] = 0
    ac_index['end'] = len(ac_index)
    index_ac = {v: k for k, v in ac_index.items()}

    rl_index = create_index(log_df, 'role')
    rl_index['start'] = 0
    rl_index['end'] = len(rl_index)
    index_rl = {v: k for k, v in rl_index.items()}

    # Define the number of dimensions as the 4th root of the number of categories
    dim_number = math.ceil(
        len(
            list(
                itertools.product(
                    *[list(ac_index.items()),
                      list(rl_index.items())])))**0.25)

    ac_weights, rl_weights = train_embedded(log_df, ac_index, rl_index,
                                            dim_number)

    sup.create_file_from_list(
        reformat_matrix(index_ac, ac_weights),
        os.path.join(os.path.join('input_files', 'embedded_matix'),
                     'ac_' + parameters['file_name'].split('.')[0] + '.emb'))
    sup.create_file_from_list(
        reformat_matrix(index_rl, rl_weights),
        os.path.join(os.path.join('input_files', 'embedded_matix'),
                     'rl_' + parameters['file_name'].split('.')[0] + '.emb'))
Ejemplo n.º 4
0
 def load_log(params):
     params['read_options']['filter_d_attrib'] = False
     log = lr.LogReader(os.path.join('input_files', params['file_name']),
                        params['read_options'])
     log_df = pd.DataFrame(log.data)
     if set(['Unnamed: 0', 'role']).issubset(set(log_df.columns)):
         log_df.drop(columns=['Unnamed: 0', 'role'], inplace=True)
     log_df = log_df[~log_df.task.isin(['Start', 'End'])]
     return log_df
Ejemplo n.º 5
0
def measure_stats(settings, bpmn, rep):
    """Executes BIMP Simulations.
    Args:
        settings (dict): Path to jar and file names
        rep (int): repetition number
    """
    timeformat = '%Y-%m-%d %H:%M:%S.%f'
    temp = lr.LogReader(os.path.join(settings['output'], 'sim_data',
                                     settings['file'].split('.')[0] + '_'+str(rep + 1)+'.csv'),
                        timeformat)
    process_graph = gph.create_process_structure(bpmn)
    _, _, temp_stats = rpl.replay(process_graph, temp, source='simulation', run_num=rep + 1)
    temp_stats = pd.DataFrame.from_records(temp_stats)
    role = lambda x: x['resource']
    temp_stats['role'] = temp_stats.apply(role, axis=1)
    return temp_stats
Ejemplo n.º 6
0
def read_inputs(timeformat,
                log_columns_numbers,
                log_file_name,
                bpmn_file_name,
                ns_include=True):
    # Reading and parsing of config file
    log, bpmn = None, None
    try:
        log = lr.LogReader(log_file_name, timeformat, timeformat,
                           log_columns_numbers, ns_include)
        bpmn = br.BpmnReader(bpmn_file_name)
    except IOError as e:
        print('Input error ' + str(e))
    except Exception as e:
        print('Unexpected error...' + '\n' + str(e))
    return log, bpmn
Ejemplo n.º 7
0
 def read_inputs(self) -> None:
     # Output folder creation
     if not os.path.exists(self.settings['output']):
         os.makedirs(self.settings['output'])
         os.makedirs(os.path.join(self.settings['output'], 'sim_data'))
     # Event log reading
     self.log = lr.LogReader(
         os.path.join(self.settings['input'], self.settings['file']),
         self.settings['read_options'])
     # Create customized event-log for the external tools
     xes.XesWriter(self.log, self.settings)
     # Execution steps
     self.mining_structure(self.settings)
     self.bpmn = br.BpmnReader(
         os.path.join(self.settings['output'],
                      self.settings['file'].split('.')[0] + '.bpmn'))
     self.process_graph = gph.create_process_structure(self.bpmn)
Ejemplo n.º 8
0
Archivo: simod.py Proyecto: dtdi/Simod
 def read_inputs(self, **kwargs) -> None:
     # Output folder creation
     if not os.path.exists(self.settings['output']):
         os.makedirs(self.settings['output'])
         os.makedirs(os.path.join(self.settings['output'], 'sim_data'))
     # Event log reading
     self.log = lr.LogReader(
         os.path.join(self.settings['input'], self.settings['file']),
         self.settings['read_options'])
     # Time splitting 80-20
     self.split_timeline(0.2,
                         self.settings['read_options']['one_timestamp'])
     # Create customized event-log for the external tools
     xes.XesWriter(self.log, self.settings)
     # Execution steps
     self.mining_structure(self.settings)
     self.bpmn = br.BpmnReader(
         os.path.join(self.settings['output'],
                      self.settings['file'].split('.')[0] + '.bpmn'))
     self.process_graph = gph.create_process_structure(self.bpmn)
     # Replaying test partition
     print("-- Reading test partition --")
     try:
         test_replayer = rpl.LogReplayer(
             self.process_graph,
             self.get_traces(
                 self.log_test,
                 self.settings['read_options']['one_timestamp']),
             self.settings)
         self.process_stats = test_replayer.process_stats
         self.process_stats = pd.DataFrame.from_records(self.process_stats)
         self.log_test = test_replayer.conformant_traces
     except AssertionError as e:
         print(e)
         self.status = STATUS_FAIL
         print("-- End of trial --")
Ejemplo n.º 9
0
def single_exec(settings):
    """Main aplication method"""
    # Read settings from config file
    settings = read_settings(settings)
    # Output folder creation
    if not os.path.exists(settings['output']):
        os.makedirs(settings['output'])
        os.makedirs(os.path.join(settings['output'], 'sim_data'))
    # Copy event-log to output folder
    copyfile(os.path.join(settings['input'], settings['file']),
             os.path.join(settings['output'], settings['file']))
    # Event log reading
    log = lr.LogReader(os.path.join(settings['output'], settings['file']),
                       settings['timeformat'])
    # Execution steps
    mining_structure(settings, settings['epsilon'], settings['eta'])
    bpmn = br.BpmnReader(os.path.join(settings['output'],
                                      settings['file'].split('.')[0]+'.bpmn'))
    process_graph = gph.create_process_structure(bpmn)

    # Evaluate alignment
    chk.evaluate_alignment(process_graph, log, settings)

    print("-- Mining Simulation Parameters --")
    parameters, process_stats = par.extract_parameters(log, bpmn, process_graph)
    xml.print_parameters(os.path.join(settings['output'],
                                      settings['file'].split('.')[0]+'.bpmn'),
                         os.path.join(settings['output'],
                                      settings['file'].split('.')[0]+'.bpmn'),
                         parameters)
    response = list()
    status = 'ok'
    sim_values = list()
    if settings['simulation']:
#        if settings['analysis']:
        process_stats = pd.DataFrame.from_records(process_stats)
        for rep in range(settings['repetitions']):
            print("Experiment #" + str(rep + 1))
            try:
                simulate(settings, rep)
                process_stats = process_stats.append(measure_stats(settings,
                                                                   bpmn, rep),
                                                     ignore_index=True,
                                                     sort=False)
                sim_values.append(gen.mesurement(process_stats, settings, rep))
            except:
                status = 'fail'
                break
    data = {'alg_manag': settings['alg_manag'],
            'epsilon': settings['epsilon'],
            'eta': settings['eta'],
            'output': settings['output']
            }

    if status == 'ok':
        loss = (1 - np.mean([x['act_norm'] for x in sim_values]))
        if loss < 0:
            response.append({**{'loss': loss, 'status': 'fail'}, **data})
        else:
            response.append({**{'loss': loss, 'status': status}, **data})
    else:
        response.append({**{'loss': 1, 'status': status}, **data})

    return response
Ejemplo n.º 10
0
def training_model(timeformat, args, no_loops=False):
    """Main method of the training module.
    Args:
        timeformat (str): event-log date-time format.
        args (dict): parameters for training the network.
        no_loops (boolean): remove loops fom the event-log (optional).
    """
    parameters = dict()
    log = lr.LogReader(os.path.join('input_files', args['file_name']),
                       timeformat,
                       timeformat,
                       one_timestamp=True)
    _, resource_table = rl.read_resource_pool(log, sim_percentage=0.50)
    # Role discovery
    log_df_resources = pd.DataFrame.from_records(resource_table)
    log_df_resources = log_df_resources.rename(index=str,
                                               columns={"resource": "user"})
    # Dataframe creation
    log_df = pd.DataFrame.from_records(log.data)
    log_df = log_df.merge(log_df_resources, on='user', how='left')
    log_df = log_df[log_df.task != 'Start']
    log_df = log_df[log_df.task != 'End']
    log_df = log_df.reset_index(drop=True)

    if no_loops:
        log_df = nsup.reduce_loops(log_df)
    # Index creation
    ac_index = create_index(log_df, 'task')
    ac_index['start'] = 0
    ac_index['end'] = len(ac_index)
    index_ac = {v: k for k, v in ac_index.items()}

    rl_index = create_index(log_df, 'role')
    rl_index['start'] = 0
    rl_index['end'] = len(rl_index)
    index_rl = {v: k for k, v in rl_index.items()}

    # Load embedded matrix
    ac_weights = load_embedded(
        index_ac, 'ac_' + args['file_name'].split('.')[0] + '.emb')
    rl_weights = load_embedded(
        index_rl, 'rl_' + args['file_name'].split('.')[0] + '.emb')
    # Calculate relative times
    log_df = add_calculated_features(log_df, ac_index, rl_index)
    # Split validation datasets
    log_df_train, log_df_test = nsup.split_train_test(log_df, 0.3)  # 70%/30%
    # Input vectorization
    vec = vectorization(log_df_train, ac_index, rl_index, args)
    # Parameters export
    output_folder = os.path.join('output_files', sup.folder_id())
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        os.makedirs(os.path.join(output_folder, 'parameters'))

    parameters['event_log'] = args['file_name']
    parameters['exp_desc'] = args
    parameters['index_ac'] = index_ac
    parameters['index_rl'] = index_rl
    parameters['dim'] = dict(samples=str(vec['prefixes']['x_ac_inp'].shape[0]),
                             time_dim=str(
                                 vec['prefixes']['x_ac_inp'].shape[1]),
                             features=str(len(ac_index)))
    parameters['max_tbtw'] = vec['max_tbtw']

    sup.create_json(
        parameters,
        os.path.join(output_folder, 'parameters', 'model_parameters.json'))
    sup.create_csv_file_header(
        log_df_test.to_dict('records'),
        os.path.join(output_folder, 'parameters', 'test_log.csv'))

    if args['model_type'] == 'joint':
        mj.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'shared':
        msh.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'specialized':
        msp.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'concatenated':
        mcat.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'shared_cat':
        mshcat.training_model(vec, ac_weights, rl_weights, output_folder, args)