Esempio n. 1
0
    def __init__(self, settings, log):
        """constructor"""
        self.space = self.define_search_space(settings)
        # Read inputs
        self.log = log
        self._split_timeline(0.8, settings['read_options']['one_timestamp'])

        self.org_log = copy.deepcopy(log)
        self.org_log_train = copy.deepcopy(self.log_train)
        self.org_log_valdn = copy.deepcopy(self.log_valdn)
        # Load settings
        self.settings = settings
        self.temp_output = os.path.join('output_files', sup.folder_id())
        if not os.path.exists(self.temp_output):
            os.makedirs(self.temp_output)
        self.file_name = os.path.join(self.temp_output,
                                      sup.file_id(prefix='OP_'))
        # Results file
        if not os.path.exists(self.file_name):
            open(self.file_name, 'w').close()
        # Trials object to track progress
        self.bayes_trials = Trials()
        self.best_output = None
        self.best_parms = dict()
        self.best_similarity = 0
 def _temp_path_redef(self, settings, **kwargs) -> dict:
     # Paths redefinition
     settings['output'] = os.path.join(self.temp_output, sup.folder_id())
     # Output folder creation
     if not os.path.exists(settings['output']):
         os.makedirs(settings['output'])
     return settings
Esempio n. 3
0
 def __init__(self, args):
     """constructor"""
     self.__id = sup.folder_id()
     self.__status = St.HOLDING
     self.__worker_id = None
     self.__args = args
     Job.id_index[self.__id].append(self)
Esempio n. 4
0
    def create_worker(self, job_id):
        job = Job.find_by_id(job_id)
        exp_name = 'worker'
        default = [
            '#!/bin/bash', '#SBATCH --partition=' + self.conn['partition'],
            '#SBATCH -J ' + exp_name, '#SBATCH --output=' +
            ('"' + os.path.join(self.stdout_folder, 'slurm-%j.out' + '"')),
            '#SBATCH -N 1', '#SBATCH --cpus-per-task=' + self.conn['cpus'],
            '#SBATCH --mem=' + self.conn['mem'], '#SBATCH -t 72:00:00',
            'module load cuda/10.0', 'module load python/3.6.3/virtenv',
            'module load java-1.8.0_40', 'source deactivate',
            'source activate ' + self.conn['env']
        ]

        def format_option(short, parm):
            return (' -' + short +
                    ' None' if parm in [None, 'nan', '', np.nan] else ' -' +
                    short + ' ' + str(parm))

        options = 'python ' + self.conn['script']
        for k, v in job.get_args().items():
            options += format_option(k, v)
        if self.output_folder:
            options += format_option('o', self.output_folder)
        # options += ' -a training'
        default.append(options)
        file_name = os.path.join(self.jobs_folder, sup.folder_id())
        sup.create_text_file(default, file_name)
        return self.submit_job(file_name)
 def _temp_path_creation() -> None:
     # Paths redefinition
     temp_path = os.path.join('output_files', sup.folder_id())
     # Output folder creation
     if not os.path.exists(temp_path):
         os.makedirs(temp_path)
     return temp_path
Esempio n. 6
0
    def __init__(self, params):
        """constructor"""
        self.log = self.load_log(params)
        # Split validation partitions
        self.log_train = pd.DataFrame()
        self.log_test = pd.DataFrame()
        # Activities and roles indexes
        self.ac_index = dict()
        self.index_ac = dict()

        self.rl_index = dict()
        self.index_rl = dict()
        # Training examples
        self.examples = dict()
        # Embedded dimensions
        self.ac_weights = list()
        self.rl_weights = list()
        # Preprocess the event-log
        self.preprocess(params)
        # Train model
        params['output'] = os.path.join('output_files', sup.folder_id())
        if params['opt_method'] == 'rand_hpc':
            optimizer = hpc_op.ModelHPCOptimizer(params, 
                                                 self.log, 
                                                 self.ac_index, 
                                                 self.rl_index)
            optimizer.execute_trials()
        elif params['opt_method'] == 'bayesian':
            optimizer = op.ModelOptimizer(params, 
                                          self.log, 
                                          self.ac_index, 
                                          self.ac_weights,
                                          self.rl_index,
                                          self.rl_weights)
            optimizer.execute_trials()
        # Export results
        output_path = os.path.join('output_files', sup.folder_id())
        shutil.copytree(optimizer.best_output, output_path)
        shutil.copy(optimizer.file_name, output_path)
        self.export_parms(output_path, optimizer.best_parms)
        # Remove folder
        shutil.rmtree(params['output'])
Esempio n. 7
0
 def __init__(self, ia_train, ia_valdn, parms):
     """constructor"""
     self.temp_output = os.path.join('output_files', sup.folder_id())
     if not os.path.exists(self.temp_output):
         os.makedirs(self.temp_output)
     self.ia_train = ia_train
     self.ia_valdn = ia_valdn
     self.parms = parms
     self.model_metadata = dict()
     self.is_safe = True
     self._load_model()
Esempio n. 8
0
 def __init__(self, log, valdn, parms):
     """constructor"""
     self.temp_output = os.path.join('output_files', sup.folder_id())
     if not os.path.exists(self.temp_output):
         os.makedirs(self.temp_output)
     self.log = pd.DataFrame(log.data)
     self.valdn = valdn
     self.parms = parms
     self.model_metadata = dict()
     self.is_safe = True
     self._load_model()
Esempio n. 9
0
 def _temp_path_redef(self, settings, **kwargs) -> None:
     # Paths redefinition
     settings['output'] = os.path.join(self.temp_output, sup.folder_id())
     if settings['alg_manag'] == 'repair':
         settings['aligninfo'] = os.path.join(
             settings['output'], 'CaseTypeAlignmentResults.csv')
         settings['aligntype'] = os.path.join(settings['output'],
                                              'AlignmentStatistics.csv')
     # Output folder creation
     if not os.path.exists(settings['output']):
         os.makedirs(settings['output'])
         os.makedirs(os.path.join(settings['output'], 'sim_data'))
     # Create customized event-log for the external tools
     xes.XesWriter(self.log_train, settings)
     return settings
 def execute_pipeline(self) -> None:
     exec_times = dict()
     self.is_safe = self._read_inputs(log_time=exec_times,
                                      is_safe=self.is_safe)
     # modify number of instances in the model
     num_inst = len(self.log_test.caseid.unique())
     # get minimum date
     start_time = (self.log_test.start_timestamp.min().strftime(
         "%Y-%m-%dT%H:%M:%S.%f+00:00"))
     print('############ Structure optimization ############')
     # Structure optimization
     seq_gen = sg.SeqGenerator({
         **self.parms['gl'],
         **self.parms['s_gen']
     }, self.log_train)
     print('############ Generate interarrivals ############')
     self.is_safe = self._read_bpmn(log_time=exec_times,
                                    is_safe=self.is_safe)
     generator = gen.InstancesGenerator(self.process_graph, self.log_train,
                                        self.parms['i_gen']['gen_method'], {
                                            **self.parms['gl'],
                                            **self.parms['i_gen']
                                        })
     print('########### Generate instances times ###########')
     times_allocator = ta.TimesGenerator(self.process_graph, self.log_train,
                                         {
                                             **self.parms['gl'],
                                             **self.parms['t_gen']
                                         })
     output_path = os.path.join('output_files', sup.folder_id())
     for rep_num in range(0, self.parms['gl']['exp_reps']):
         seq_gen.generate(num_inst, start_time)
         #TODO: remover esto, es simplemente para test
         if self.parms['i_gen']['gen_method'] == 'test':
             iarr = generator.generate(self.log_test, start_time)
         else:
             iarr = generator.generate(num_inst, start_time)
         event_log = times_allocator.generate(seq_gen.gen_seqs, iarr)
         event_log = pd.DataFrame(event_log)
         # Export log
         self._export_log(event_log, output_path, rep_num)
         # Evaluate log
         if self.parms['gl']['evaluate']:
             self.sim_values.extend(
                 self._evaluate_logs(self.parms, self.log_test, event_log,
                                     rep_num))
     self._export_results(output_path)
     print("-- End of trial --")
    def _discover_model(self, **kwargs):
        # indexes creation
        self.ac_index, self.index_ac = self._indexing(self.log.data, 'task')
        self.usr_index, self.index_usr = self._indexing(self.log.data, 'user')
        # replay
        self._replay_process()

        if self.parms['model_type'] in ['inter', 'dual_inter', 'inter_nt']:
            self._add_intercases()
        self._split_timeline(0.8, self.one_timestamp)
        self.log_train = self._add_calculated_times(self.log_train)
        self.log_valdn = self._add_calculated_times(self.log_valdn)
        # Add index to the event log
        ac_idx = lambda x: self.ac_index[x['task']]
        self.log_train['ac_index'] = self.log_train.apply(ac_idx, axis=1)
        self.log_valdn['ac_index'] = self.log_valdn.apply(ac_idx, axis=1)
        if self.parms['model_type'] in ['inter_nt', 'dual_inter']:
            ac_idx = lambda x: self.ac_index[x['n_task']]
            self.log_train['n_ac_index'] = self.log_train.apply(ac_idx, axis=1)
            self.log_valdn['n_ac_index'] = self.log_valdn.apply(ac_idx, axis=1)
        # Load embedding matrixes
        emb_trainer = em.EmbeddingTrainer(self.parms, pd.DataFrame(self.log),
                                          self.ac_index, self.index_ac,
                                          self.usr_index, self.index_usr)
        self.ac_weights = emb_trainer.load_embbedings()
        # Scale features
        self._transform_features()
        # Optimizer
        self.parms['output'] = os.path.join('output_files', sup.folder_id())
        if self.parms['opt_method'] == 'rand_hpc':
            times_optimizer = hpc_op.ModelHPCOptimizer(self.parms,
                                                       self.log_train,
                                                       self.log_valdn,
                                                       self.ac_index,
                                                       self.ac_weights)
            times_optimizer.execute_trials()
        elif self.parms['opt_method'] == 'bayesian':
            times_optimizer = to.TimesModelOptimizer(self.parms,
                                                     self.log_train,
                                                     self.log_valdn,
                                                     self.ac_index,
                                                     self.ac_weights)
            times_optimizer.execute_trials()
        return times_optimizer
def sbatch_creator(log, miner):
    exp_name = (os.path.splitext(log)[0].lower().split(' ')[0][:5])
    if imp == 2:
        default = [
            '#!/bin/bash',
            '#SBATCH --partition=gpu',
            '#SBATCH --gres=gpu:tesla:1',
            '#SBATCH -J ' + exp_name,
            '#SBATCH -N 1',
            '#SBATCH --cpus-per-task=20',
            '#SBATCH --mem=32000',
            '#SBATCH -t 120:00:00',
            'export DISPLAY=' + ip_num,
            'module load jdk-1.8.0_25',
            'module load python/3.6.3/virtenv',
            'source activate deep_sim3',
        ]
    else:
        default = [
            '#!/bin/bash',
            '#SBATCH --partition=main',
            '#SBATCH -J ' + exp_name,
            '#SBATCH -N 1',
            '#SBATCH --cpus-per-task=20',
            '#SBATCH --mem=32000',
            '#SBATCH -t 120:00:00',
            'export DISPLAY=' + ip_num,
            'module load jdk-1.8.0_25',
            'module load python/3.6.3/virtenv',
            'source activate deep_sim3',
        ]

        options = 'python pipeline.py -f ' + log
        options += ' -g False'
        options += ' -i False'
        options += ' -p False'
        options += ' -t False'
        options += ' -s True'
        options += ' -e True'
        options += ' -m ' + miner

    default.append(options)
    file_name = sup.folder_id()
    sup.create_text_file(default, os.path.join(output_folder, file_name))