def __init__(self, settings, log): """constructor""" self.space = self.define_search_space(settings) # Read inputs self.log = log self._split_timeline(0.8, settings['read_options']['one_timestamp']) self.org_log = copy.deepcopy(log) self.org_log_train = copy.deepcopy(self.log_train) self.org_log_valdn = copy.deepcopy(self.log_valdn) # Load settings self.settings = settings self.temp_output = os.path.join('output_files', sup.folder_id()) if not os.path.exists(self.temp_output): os.makedirs(self.temp_output) self.file_name = os.path.join(self.temp_output, sup.file_id(prefix='OP_')) # Results file if not os.path.exists(self.file_name): open(self.file_name, 'w').close() # Trials object to track progress self.bayes_trials = Trials() self.best_output = None self.best_parms = dict() self.best_similarity = 0
def _temp_path_redef(self, settings, **kwargs) -> dict: # Paths redefinition settings['output'] = os.path.join(self.temp_output, sup.folder_id()) # Output folder creation if not os.path.exists(settings['output']): os.makedirs(settings['output']) return settings
def __init__(self, args): """constructor""" self.__id = sup.folder_id() self.__status = St.HOLDING self.__worker_id = None self.__args = args Job.id_index[self.__id].append(self)
def create_worker(self, job_id): job = Job.find_by_id(job_id) exp_name = 'worker' default = [ '#!/bin/bash', '#SBATCH --partition=' + self.conn['partition'], '#SBATCH -J ' + exp_name, '#SBATCH --output=' + ('"' + os.path.join(self.stdout_folder, 'slurm-%j.out' + '"')), '#SBATCH -N 1', '#SBATCH --cpus-per-task=' + self.conn['cpus'], '#SBATCH --mem=' + self.conn['mem'], '#SBATCH -t 72:00:00', 'module load cuda/10.0', 'module load python/3.6.3/virtenv', 'module load java-1.8.0_40', 'source deactivate', 'source activate ' + self.conn['env'] ] def format_option(short, parm): return (' -' + short + ' None' if parm in [None, 'nan', '', np.nan] else ' -' + short + ' ' + str(parm)) options = 'python ' + self.conn['script'] for k, v in job.get_args().items(): options += format_option(k, v) if self.output_folder: options += format_option('o', self.output_folder) # options += ' -a training' default.append(options) file_name = os.path.join(self.jobs_folder, sup.folder_id()) sup.create_text_file(default, file_name) return self.submit_job(file_name)
def _temp_path_creation() -> None: # Paths redefinition temp_path = os.path.join('output_files', sup.folder_id()) # Output folder creation if not os.path.exists(temp_path): os.makedirs(temp_path) return temp_path
def __init__(self, params): """constructor""" self.log = self.load_log(params) # Split validation partitions self.log_train = pd.DataFrame() self.log_test = pd.DataFrame() # Activities and roles indexes self.ac_index = dict() self.index_ac = dict() self.rl_index = dict() self.index_rl = dict() # Training examples self.examples = dict() # Embedded dimensions self.ac_weights = list() self.rl_weights = list() # Preprocess the event-log self.preprocess(params) # Train model params['output'] = os.path.join('output_files', sup.folder_id()) if params['opt_method'] == 'rand_hpc': optimizer = hpc_op.ModelHPCOptimizer(params, self.log, self.ac_index, self.rl_index) optimizer.execute_trials() elif params['opt_method'] == 'bayesian': optimizer = op.ModelOptimizer(params, self.log, self.ac_index, self.ac_weights, self.rl_index, self.rl_weights) optimizer.execute_trials() # Export results output_path = os.path.join('output_files', sup.folder_id()) shutil.copytree(optimizer.best_output, output_path) shutil.copy(optimizer.file_name, output_path) self.export_parms(output_path, optimizer.best_parms) # Remove folder shutil.rmtree(params['output'])
def __init__(self, ia_train, ia_valdn, parms): """constructor""" self.temp_output = os.path.join('output_files', sup.folder_id()) if not os.path.exists(self.temp_output): os.makedirs(self.temp_output) self.ia_train = ia_train self.ia_valdn = ia_valdn self.parms = parms self.model_metadata = dict() self.is_safe = True self._load_model()
def __init__(self, log, valdn, parms): """constructor""" self.temp_output = os.path.join('output_files', sup.folder_id()) if not os.path.exists(self.temp_output): os.makedirs(self.temp_output) self.log = pd.DataFrame(log.data) self.valdn = valdn self.parms = parms self.model_metadata = dict() self.is_safe = True self._load_model()
def _temp_path_redef(self, settings, **kwargs) -> None: # Paths redefinition settings['output'] = os.path.join(self.temp_output, sup.folder_id()) if settings['alg_manag'] == 'repair': settings['aligninfo'] = os.path.join( settings['output'], 'CaseTypeAlignmentResults.csv') settings['aligntype'] = os.path.join(settings['output'], 'AlignmentStatistics.csv') # Output folder creation if not os.path.exists(settings['output']): os.makedirs(settings['output']) os.makedirs(os.path.join(settings['output'], 'sim_data')) # Create customized event-log for the external tools xes.XesWriter(self.log_train, settings) return settings
def execute_pipeline(self) -> None: exec_times = dict() self.is_safe = self._read_inputs(log_time=exec_times, is_safe=self.is_safe) # modify number of instances in the model num_inst = len(self.log_test.caseid.unique()) # get minimum date start_time = (self.log_test.start_timestamp.min().strftime( "%Y-%m-%dT%H:%M:%S.%f+00:00")) print('############ Structure optimization ############') # Structure optimization seq_gen = sg.SeqGenerator({ **self.parms['gl'], **self.parms['s_gen'] }, self.log_train) print('############ Generate interarrivals ############') self.is_safe = self._read_bpmn(log_time=exec_times, is_safe=self.is_safe) generator = gen.InstancesGenerator(self.process_graph, self.log_train, self.parms['i_gen']['gen_method'], { **self.parms['gl'], **self.parms['i_gen'] }) print('########### Generate instances times ###########') times_allocator = ta.TimesGenerator(self.process_graph, self.log_train, { **self.parms['gl'], **self.parms['t_gen'] }) output_path = os.path.join('output_files', sup.folder_id()) for rep_num in range(0, self.parms['gl']['exp_reps']): seq_gen.generate(num_inst, start_time) #TODO: remover esto, es simplemente para test if self.parms['i_gen']['gen_method'] == 'test': iarr = generator.generate(self.log_test, start_time) else: iarr = generator.generate(num_inst, start_time) event_log = times_allocator.generate(seq_gen.gen_seqs, iarr) event_log = pd.DataFrame(event_log) # Export log self._export_log(event_log, output_path, rep_num) # Evaluate log if self.parms['gl']['evaluate']: self.sim_values.extend( self._evaluate_logs(self.parms, self.log_test, event_log, rep_num)) self._export_results(output_path) print("-- End of trial --")
def _discover_model(self, **kwargs): # indexes creation self.ac_index, self.index_ac = self._indexing(self.log.data, 'task') self.usr_index, self.index_usr = self._indexing(self.log.data, 'user') # replay self._replay_process() if self.parms['model_type'] in ['inter', 'dual_inter', 'inter_nt']: self._add_intercases() self._split_timeline(0.8, self.one_timestamp) self.log_train = self._add_calculated_times(self.log_train) self.log_valdn = self._add_calculated_times(self.log_valdn) # Add index to the event log ac_idx = lambda x: self.ac_index[x['task']] self.log_train['ac_index'] = self.log_train.apply(ac_idx, axis=1) self.log_valdn['ac_index'] = self.log_valdn.apply(ac_idx, axis=1) if self.parms['model_type'] in ['inter_nt', 'dual_inter']: ac_idx = lambda x: self.ac_index[x['n_task']] self.log_train['n_ac_index'] = self.log_train.apply(ac_idx, axis=1) self.log_valdn['n_ac_index'] = self.log_valdn.apply(ac_idx, axis=1) # Load embedding matrixes emb_trainer = em.EmbeddingTrainer(self.parms, pd.DataFrame(self.log), self.ac_index, self.index_ac, self.usr_index, self.index_usr) self.ac_weights = emb_trainer.load_embbedings() # Scale features self._transform_features() # Optimizer self.parms['output'] = os.path.join('output_files', sup.folder_id()) if self.parms['opt_method'] == 'rand_hpc': times_optimizer = hpc_op.ModelHPCOptimizer(self.parms, self.log_train, self.log_valdn, self.ac_index, self.ac_weights) times_optimizer.execute_trials() elif self.parms['opt_method'] == 'bayesian': times_optimizer = to.TimesModelOptimizer(self.parms, self.log_train, self.log_valdn, self.ac_index, self.ac_weights) times_optimizer.execute_trials() return times_optimizer
def sbatch_creator(log, miner): exp_name = (os.path.splitext(log)[0].lower().split(' ')[0][:5]) if imp == 2: default = [ '#!/bin/bash', '#SBATCH --partition=gpu', '#SBATCH --gres=gpu:tesla:1', '#SBATCH -J ' + exp_name, '#SBATCH -N 1', '#SBATCH --cpus-per-task=20', '#SBATCH --mem=32000', '#SBATCH -t 120:00:00', 'export DISPLAY=' + ip_num, 'module load jdk-1.8.0_25', 'module load python/3.6.3/virtenv', 'source activate deep_sim3', ] else: default = [ '#!/bin/bash', '#SBATCH --partition=main', '#SBATCH -J ' + exp_name, '#SBATCH -N 1', '#SBATCH --cpus-per-task=20', '#SBATCH --mem=32000', '#SBATCH -t 120:00:00', 'export DISPLAY=' + ip_num, 'module load jdk-1.8.0_25', 'module load python/3.6.3/virtenv', 'source activate deep_sim3', ] options = 'python pipeline.py -f ' + log options += ' -g False' options += ' -i False' options += ' -p False' options += ' -t False' options += ' -s True' options += ' -e True' options += ' -m ' + miner default.append(options) file_name = sup.folder_id() sup.create_text_file(default, os.path.join(output_folder, file_name))