Esempio n. 1
0
def on_start_opt_clicked(_):
    global ON_EXECUTION
    # "linking function with output"
    res_out.value = ''
    graph_out.clear_output()
    # what happens when we press the button
    temp_file = sup.folder_id()
    if not os.path.exists(os.path.join('outputs', temp_file)):
        open(os.path.join('outputs', temp_file), 'w').close()
    settings = {
        'file': txt_eventlog.value,
        'repetitions': sl_rep_opt.value,
        'simulation': True,
        'temp_file': temp_file
    }
    args = {
        'epsilon': sl_epsilon_range.value,
        'eta': sl_eta_range.value,
        'max_eval': sl_max_evals.value
    }
    ON_EXECUTION = True
    thread = threading.Thread(target=work, args=(temp_file, ))
    thread.start()
    # Deactivate controls
    change_enablement(box_opt, True)
    results, bayes_trials = sim.hyper_execution(settings, args)
    ON_EXECUTION = False
    # Reactivate controls
    change_enablement(box_opt, False)
Esempio n. 2
0
def define_general_settings(settings):
    """ Sets the app general settings"""
    column_names = {
        'Case ID': 'caseid',
        'Activity': 'task',
        'lifecycle:transition': 'event_type',
        'Resource': 'user'
    }
    # Event-log reading options
    settings['read_options'] = {
        'timeformat': '%Y-%m-%dT%H:%M:%S.%f',
        'column_names': column_names,
        'one_timestamp': False,
        'filter_d_attrib': True,
        'ns_include': True
    }
    # Folders structure
    settings['input'] = 'inputs'
    settings['output'] = os.path.join('outputs', sup.folder_id())
    # External tools routes
    settings['miner_path'] = os.path.join('external_tools', 'splitminer',
                                          'splitminer.jar')
    settings['bimp_path'] = os.path.join('external_tools', 'bimp',
                                         'qbp-simulator-engine.jar')
    settings['align_path'] = os.path.join('external_tools', 'proconformance',
                                          'ProConformance2.jar')
    settings['aligninfo'] = os.path.join(settings['output'],
                                         'CaseTypeAlignmentResults.csv')
    settings['aligntype'] = os.path.join(settings['output'],
                                         'AlignmentStatistics.csv')
    return settings
Esempio n. 3
0
 def temp_path_redef(self) -> None:
     # Paths redefinition
     self.settings['output'] = os.path.join('outputs', sup.folder_id())
     if self.settings['alg_manag'] == 'repair':
         try:
             self.settings['aligninfo'] = os.path.join(
                 self.settings['output'], 'CaseTypeAlignmentResults.csv')
             self.settings['aligntype'] = os.path.join(
                 self.settings['output'], 'AlignmentStatistics.csv')
         except Exception as e:
             print(e)
             self.status = STATUS_FAIL
Esempio n. 4
0
def sbatch_creator(configs):
    for i, _ in enumerate(configs):
        if configs[i]['model_type'] in ['shared_cat', 'seq2seq']:
            exp_name = (os.path.splitext(log)[0].lower().split(' ')[0][:4] +
                        arch)
        elif configs[i]['model_type'] in [
                'shared_cat_inter', 'seq2seq_inter', 'shared_cat_inter_full'
        ]:
            exp_name = (os.path.splitext(log)[0].lower().split(' ')[0][:4] +
                        arch + 'i')
        elif configs[i]['model_type'] in ['shared_cat_snap']:
            exp_name = (os.path.splitext(log)[0].lower().split(' ')[0][:4] +
                        arch + 's')
        elif configs[i]['model_type'] in ['shared_cat_city']:
            exp_name = (os.path.splitext(log)[0].lower().split(' ')[0][:4] +
                        arch + 'c')
        if imp == 2:
            default = [
                '#!/bin/bash', '#SBATCH --partition=gpu',
                '#SBATCH --gres=gpu:tesla:1', '#SBATCH -J ' + exp_name,
                '#SBATCH -N 1', '#SBATCH --mem=14000', '#SBATCH -t 72:00:00',
                'module load cuda/10.0', 'module load python/3.6.3/virtenv',
                'source activate lstm_pip'
            ]
        else:
            default = [
                '#!/bin/bash', '#SBATCH --partition=amd',
                '#SBATCH -J ' + exp_name, '#SBATCH -N 1',
                '#SBATCH --mem=14000', '#SBATCH -t 72:00:00',
                'module load cuda/10.0', 'module load python/3.6.3/virtenv',
                'source activate lstm_pip'
            ]

        def format_option(short, parm):
            return (' -' + short + ' None' if configs[i][parm]
                    in [None, 'nan', '', np.nan] else ' -' + short + ' ' +
                    str(configs[i][parm]))

        options = 'python lstm.py -f ' + log + ' -i ' + str(imp)
        options += ' -a training'
        options += ' -o True'
        options += format_option('l', 'lstm_act')
        options += format_option('y', 'l_sizes')
        options += format_option('d', 'dense_act')
        options += format_option('n', 'norm_method')
        options += format_option('m', 'model_type')
        options += format_option('p', 'optimizers')
        if arch == 'sh':
            options += format_option('z', 'n_sizes')

        default.append(options)
        file_name = sup.folder_id()
        sup.create_text_file(default, os.path.join(output_folder, file_name))
Esempio n. 5
0
def main(argv):
    settings = dict()
    args = dict()
    # Exec mode 'single', 'optimizer'
    settings['exec_mode'] = 'single'
#   Parameters setting manual fixed or catched by console for batch operations
    if not argv:
    #   Event-log filename    
        settings['file'] = 'Production.xes.gz'
        settings['repetitions'] = 1
        settings['simulation'] = True
        if settings['exec_mode'] == 'single':
        #   Splittminer settings [0..1]   
            settings['epsilon'] = 0.7
            settings['eta'] = 0.7
        #   'removal', 'replacement', 'repairment'
            settings['alg_manag'] = 'removal'
            # Single Execution
            sim.single_exec(settings)
        else:
            args['epsilon'] = [0.3, 0.7]
            args['eta'] = [0.3, 0.7]
            args['max_eval'] = 2
            settings['temp_file'] = sup.folder_id()
            # Execute optimizer
            if not os.path.exists(os.path.join('outputs', settings['temp_file'])):
                open(os.path.join('outputs', settings['temp_file']), 'w').close()
                sim.hyper_execution(settings, args)
    else:
#       Catch parameters by console
        try:
            opts, _ = getopt.getopt(argv, "hf:e:n:m:r:",
                  ['eventlog=', "epsilon=", "eta=", "alg_manag=", "repetitions="])
            for opt, arg in opts:
                key = catch_parameter(opt)
                if key in ['epsilon','eta']:
                    settings[key] = float(arg)
                elif key == 'repetitions':
                    settings[key] = int(arg)
                else:
                    settings[key] = arg
        except getopt.GetoptError:
            print('Invalid option')
            sys.exit(2)
        settings['simulation'] = True
        sim.single_exec(settings)
Esempio n. 6
0
def read_settings(settings):
    """Catch parameters fron console or code defined"""
    config = cp.ConfigParser(interpolation=None)
    config.read("./config.ini")
    # Basic settings
    settings['input'] = config.get('FOLDERS', 'inputs')
    settings['output'] = os.path.join(config.get('FOLDERS', 'outputs'), sup.folder_id())
    settings['timeformat'] = config.get('EXECUTION', 'timeformat')
    # Conditional settings
    settings['miner_path'] = reformat_path(config.get('EXTERNAL', 'splitminer'))
    if settings['alg_manag'] == 'repairment':
        settings['align_path'] = reformat_path(config.get('EXTERNAL', 'proconformance'))
        settings['aligninfo'] = os.path.join(settings['output'],
                                             config.get('ALIGNMENT', 'aligninfo'))
        settings['aligntype'] = os.path.join(settings['output'],
                                             config.get('ALIGNMENT', 'aligntype'))
    if settings['simulation']:
        settings['bimp_path'] = reformat_path(config.get('EXTERNAL', 'bimp'))
    return settings
def sbatch_creator(file_list, activity):
    exp_name = activity[:4]
    for file in file_list:
        if imp == 2:
            default = [
                '#!/bin/bash', '#SBATCH --partition=gpu',
                '#SBATCH --gres=gpu:tesla:1', '#SBATCH -J ' + exp_name,
                '#SBATCH -N 1', '#SBATCH --mem=7000', '#SBATCH -t 24:00:00',
                'module load cuda/10.0', 'module load python/3.6.3/virtenv',
                'source activate lstm_pip'
            ]
        else:
            default = [
                '#!/bin/bash', '#SBATCH --partition=main',
                '#SBATCH -J ' + exp_name, '#SBATCH -N 1', '#SBATCH --mem=7000',
                '#SBATCH -t 24:00:00', 'module load cuda/10.0',
                'module load python/3.6.3/virtenv', 'source activate lstm_pip'
            ]

        default.append('python lstm.py' + ' -a ' + activity + ' -c ' +
                       file['folder'] + ' -b "' + file['file'] + '"' +
                       ' -o True' + ' -x False' + ' -t 100')
        file_name = sup.folder_id()
        sup.create_text_file(default, os.path.join(output_folder, file_name))
    def __init__(self, params):
        """constructor"""
        self.log = self.load_log(params)
        self.output_folder = os.path.join('output_files', sup.folder_id())
        # Split validation partitions
        self.log_train = pd.DataFrame()
        self.log_test = pd.DataFrame()
        # Activities and roles indexes
        self.ac_index = dict()
        self.index_ac = dict()

        self.rl_index = dict()
        self.index_rl = dict()
        # Training examples
        self.examples = dict()
        # Embedded dimensions
        self.ac_weights = list()
        self.rl_weights = list()
        # Preprocess the event-log
        self.preprocess(params)
        # Train model
        m_loader = mload.ModelLoader(params)
        m_loader.train(params['model_type'], self.examples, self.ac_weights,
                       self.rl_weights, self.output_folder)
Esempio n. 9
0
def extract_features(parms):
    #   ADAPTATION: The output file and the auxiliar files routes were customised
    fname = os.path.splitext(parms['file_name'])[0]
    output_path = os.path.join('outputs', fname + '_complete')
    aux_files_path = os.path.join('outputs', sup.folder_id())
    process_temp_folder(aux_files_path)

    #   ADAPTATION: The inclusion of any format of .csv event log and use single timestamps was added
    df = read_log(parms)

    #Creating L*
    #prf[0] - all prefixes
    #prf[1] - ids of prefixes, so that we know who is who
    #prf[2] - meta data of prefix. Here it is start/end time.
    #pref[3] = timestamps
    #pref[4] - complete prefixes
    prf = create_all_prefixes(df)

    pk.dump(prf[0],
            open(os.path.join(aux_files_path, fname + 'prefixes.p'), 'wb'))
    pk.dump(prf[1], open(os.path.join(aux_files_path, fname + 'ids.p'), 'wb'))
    pk.dump(prf[2],
            open(os.path.join(aux_files_path, fname + 'intervals.p'), 'wb'))
    pk.dump(prf[3], open(os.path.join(aux_files_path, fname + 'ts.p'), 'wb'))
    pk.dump(prf[4],
            open(os.path.join(aux_files_path, fname + 'complete.p'), 'wb'))
    #pk.dump(prf[5], open('outcome.p', 'wb'))

    prefixes = list(unpk(os.path.join(aux_files_path, fname + 'prefixes.p')))
    ids = list(unpk(os.path.join(aux_files_path, fname + 'ids.p')))
    intervals = list(unpk(os.path.join(aux_files_path, fname + 'intervals.p')))
    #    ts = list(unpk(os.path.join(aux_files_path, 'ts.p')))
    complete = list(unpk(os.path.join(aux_files_path, fname + 'complete.p')))

    int_start = []
    int_end = []

    int_event_id = []

    for i in intervals:
        int_start.append(i[0])
        int_end.append(i[1])
        int_event_id.append(i[2])

    df_prefixes = pd.DataFrame({
        'id': ids,
        'event_id': int_event_id,
        'start_time': int_start,
        'end_time': int_end,
        'complete': complete
    })  #, 'outcome':outcomes})
    df_prefixes['prefix'] = ''

    for i, p in enumerate(prefixes):
        df_prefixes.at[i, 'prefix'] = p  #','.join(p)

    dataset = feature_encoding_new(df_prefixes)
    #     ADAPTATION: The outcome Y was removed since this is not used in our approach,
    #     additionally the features of all prefixes were calculated
    intercase_df = pd.DataFrame(dataset,
                                columns=[
                                    'id', 'event_id', 'elapsed', 'lasttask',
                                    'l1', 'l2', 'l3', 'city1', 'city2',
                                    'city3', 'city4', 'city5', 'snap1',
                                    'snap2', 'snap3', 'snap4', 'snap5'
                                ])

    #    ADAPTATION: The merge with the original event log was included
    df = df.merge(intercase_df, on='event_id', how='left')
    df = df.drop(
        ['end_time', 'start_time', 'event_id', 'id', 'elapsed', 'lasttask'],
        axis=1)
    df.to_csv(output_path + '.csv', header=True)

    process_temp_folder(aux_files_path)
    os.rmdir(aux_files_path)
Esempio n. 10
0
    def train(path_trainings_log, event_log_name, id_key_for_log):
        # Laden der Trainingsmenge
        log_train = pd.read_csv(path_trainings_log, encoding='unicode_escape')
        ac_index = dict(activities_df.values.tolist())
        rl_index = dict(roles_df.values.tolist())

        index_ac = {v: k for k, v in ac_index.items()}
        index_rl = {v: k for k, v in rl_index.items()}

        equi = {'ac_index': 'activities',
                'rl_index': 'roles',
                'dur_norm': 'times'}
        columns = list(equi.keys())
        vec = {'prefixes': dict(),
               'next_evt': dict(),
               'max_dur': np.max(log_train.dur)}

        temp_data = list()
        log_df = log_train.to_dict('records')
        key = 'end_timestamp'
        log_df = sorted(log_df, key=lambda x: (x['caseid'], key))

        for key, group in itertools.groupby(log_df, key=lambda x: x['caseid']):
            trace = list(group)
            temp_dict = dict()

            for x in columns:
                serie = [y[x] for y in trace]
                if x == 'ac_index':
                    serie.insert(0, ac_index[('Start')])
                    serie.append(ac_index[('End')])
                elif x == 'rl_index':
                    serie.insert(0, rl_index[('Start')])
                    serie.append(rl_index[('End')])
                else:
                    serie.insert(0, 0)
                    serie.append(0)
                temp_dict = {**{x: serie}, **temp_dict}
            temp_dict = {**{'caseid': key}, **temp_dict}
            temp_data.append(temp_dict)

        # n-gram definition
        for i, _ in enumerate(temp_data):
            for x in columns:
                serie = list(ngrams(temp_data[i][x], parameters['n_size'],
                                    pad_left=True, left_pad_symbol=0))
                print("serie", i, x, serie)
                y_serie = [x[-1] for x in serie]
                serie = serie[:-1]
                # print("serie", i, x, serie)
                y_serie = y_serie[1:]
                # print("y_serie", i, x, y_serie)
                vec['prefixes'][equi[x]] = vec['prefixes'][equi[x]] + serie if i > 0 else serie
                vec['next_evt'][equi[x]] = vec['next_evt'][equi[x]] + y_serie if i > 0 else y_serie

                # Transform task, dur and role prefixes in vectors
        for value in equi.values():
            vec['prefixes'][value] = np.array(vec['prefixes'][value])
            vec['next_evt'][value] = np.array(vec['next_evt'][value])

        # Reshape dur (prefixes, n-gram size, 1) i.e. time distribute
        vec['prefixes']['times'] = vec['prefixes']['times'].reshape(
            (vec['prefixes']['times'].shape[0],
             vec['prefixes']['times'].shape[1], 1))
        # one-hot encode target values
        vec['next_evt']['activities'] = ku.to_categorical(
            vec['next_evt']['activities'], num_classes=len(ac_index))
        vec['next_evt']['roles'] = ku.to_categorical(
            vec['next_evt']['roles'], num_classes=len(rl_index))

        # Load embedded matrix
        ac_weights = load_embedded(index_ac, 'ac_' + event_log_name + '.emb')
        rl_weights = load_embedded(index_rl, 'rl_' + event_log_name + '.emb')

        folder_id = sup.folder_id()+id_key_for_log
        output_folder = os.path.join('output_files', folder_id)

        # Export params
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
            os.makedirs(os.path.join(output_folder, 'parameters'))

        f = open(os.path.join(output_folder, 'description'), "w")
        f.write(path_trainings_log)
        f.write(event_log_name)
        f.write(id_key_for_log)
        f.close()

        parameters['index_ac'] = index_ac
        parameters['index_rl'] = index_rl

        if parameters['model_type'] in ['shared_cat', 'shared_cat_inter']:
            print("IF")
            parameters['dim'] = dict(
                samples=str(vec['prefixes']['activities'].shape[0]),
                time_dim=str(vec['prefixes']['activities'].shape[1]),
                features=str(len(ac_index)))
        else:
            parameters['dim'] = dict(
                samples=str(vec['encoder_input_data']['activities'].shape[0]),
                time_dim=str(vec['encoder_input_data']['activities'].shape[1]),
                features=str(len(ac_index)))
        parameters['max_dur'] = str(vec['max_dur'])

        sup.create_json(parameters, os.path.join(output_folder,
                                                 'parameters',
                                                 'model_parameters.json'))

        # Trainieren des Models
        m_loader = mload.ModelLoader(parameters)
        m_loader.train(parameters['model_type'], vec, ac_weights, rl_weights, output_folder)
Esempio n. 11
0
def training_model(timeformat, args, no_loops=False):
    """Main method of the training module.
    Args:
        timeformat (str): event-log date-time format.
        args (dict): parameters for training the network.
        no_loops (boolean): remove loops fom the event-log (optional).
    """
    parameters = dict()
    log = lr.LogReader(os.path.join('input_files', args['file_name']),
                       timeformat,
                       timeformat,
                       one_timestamp=True)
    _, resource_table = rl.read_resource_pool(log, sim_percentage=0.50)
    # Role discovery
    log_df_resources = pd.DataFrame.from_records(resource_table)
    log_df_resources = log_df_resources.rename(index=str,
                                               columns={"resource": "user"})
    # Dataframe creation
    log_df = pd.DataFrame.from_records(log.data)
    log_df = log_df.merge(log_df_resources, on='user', how='left')
    log_df = log_df[log_df.task != 'Start']
    log_df = log_df[log_df.task != 'End']
    log_df = log_df.reset_index(drop=True)

    if no_loops:
        log_df = nsup.reduce_loops(log_df)
    # Index creation
    ac_index = create_index(log_df, 'task')
    ac_index['start'] = 0
    ac_index['end'] = len(ac_index)
    index_ac = {v: k for k, v in ac_index.items()}

    rl_index = create_index(log_df, 'role')
    rl_index['start'] = 0
    rl_index['end'] = len(rl_index)
    index_rl = {v: k for k, v in rl_index.items()}

    # Load embedded matrix
    ac_weights = load_embedded(
        index_ac, 'ac_' + args['file_name'].split('.')[0] + '.emb')
    rl_weights = load_embedded(
        index_rl, 'rl_' + args['file_name'].split('.')[0] + '.emb')
    # Calculate relative times
    log_df = add_calculated_features(log_df, ac_index, rl_index)
    # Split validation datasets
    log_df_train, log_df_test = nsup.split_train_test(log_df, 0.3)  # 70%/30%
    # Input vectorization
    vec = vectorization(log_df_train, ac_index, rl_index, args)
    # Parameters export
    output_folder = os.path.join('output_files', sup.folder_id())
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        os.makedirs(os.path.join(output_folder, 'parameters'))

    parameters['event_log'] = args['file_name']
    parameters['exp_desc'] = args
    parameters['index_ac'] = index_ac
    parameters['index_rl'] = index_rl
    parameters['dim'] = dict(samples=str(vec['prefixes']['x_ac_inp'].shape[0]),
                             time_dim=str(
                                 vec['prefixes']['x_ac_inp'].shape[1]),
                             features=str(len(ac_index)))
    parameters['max_tbtw'] = vec['max_tbtw']

    sup.create_json(
        parameters,
        os.path.join(output_folder, 'parameters', 'model_parameters.json'))
    sup.create_csv_file_header(
        log_df_test.to_dict('records'),
        os.path.join(output_folder, 'parameters', 'test_log.csv'))

    if args['model_type'] == 'joint':
        mj.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'shared':
        msh.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'specialized':
        msp.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'concatenated':
        mcat.training_model(vec, ac_weights, rl_weights, output_folder, args)
    elif args['model_type'] == 'shared_cat':
        mshcat.training_model(vec, ac_weights, rl_weights, output_folder, args)