def _create_model(self, compare): # hours = [8] hours = [1, 2, 4, 8, 12] args = [(w, self.ia_times, self.ia_valdn, self.parms) for w in hours] reps = len(args) def pbar_async(p, msg): pbar = tqdm(total=reps, desc=msg) processed = 0 while not p.ready(): cprocesed = (reps - p._number_left) if processed < cprocesed: increment = cprocesed - processed pbar.update(n=increment) processed = cprocesed time.sleep(1) pbar.update(n=(reps - processed)) p.wait() pbar.close() cpu_count = multiprocessing.cpu_count() w_count = reps if reps <= cpu_count else cpu_count pool = Pool(processes=w_count) # Simulate p = pool.map_async(self.create_evaluate_model, args) pbar_async(p, 'evaluating models:') pool.close() # Save results element = min(p.get(), key=lambda x: x['loss']) metadata_file = os.path.join( self.parms['ia_gen_path'], self.parms['file'].split('.')[0] + '_mpdf_meta.json') # compare with existing model save = True if compare: # Loading of parameters from existing model if os.path.exists(metadata_file): with open(metadata_file) as file: data = json.load(file) data = {k: v for k, v in data.items()} if data['loss'] < element['loss']: save = False print('dont save') if save: self.model = element['model'] sup.create_json( self.model, os.path.join(self.parms['ia_gen_path'], self.parms['file'].split('.')[0] + '_mpdf.json')) # best structure mining parameters self.model_metadata['window'] = element['model']['window'] self.model_metadata['loss'] = element['loss'] self.model_metadata['generated_at'] = ( datetime.now().strftime("%d/%m/%Y %H:%M:%S")) sup.create_json(self.model_metadata, metadata_file)
def export_params(self): configs_files = list() for config in self.space: config['ac_index'] = self.ac_index config['rl_index'] = self.rl_index conf_file = sup.file_id(prefix='CNF_', extension='.json') sup.create_json( config, os.path.join(self.temp_output, 'opt_parms', conf_file)) configs_files.append(conf_file) self.log.to_csv(os.path.join(self.temp_output, 'opt_parms', 'train.csv'), index=False, encoding='utf-8') return configs_files
def _discover_model(self, compare, **kwargs): structure_optimizer = so.StructureOptimizer( self.parms, copy.deepcopy(self.log_train)) structure_optimizer.execute_trials() struc_model = structure_optimizer.best_output best_parms = structure_optimizer.best_parms best_similarity = structure_optimizer.best_similarity metadata_file = os.path.join( self.parms['bpmn_models'], self.parms['file'].split('.')[0] + '_meta.json') # compare with existing model save = True if compare: # Loading of parameters from existing model if os.path.exists(metadata_file): with open(metadata_file) as file: data = json.load(file) data = {k: v for k, v in data.items()} print(data['similarity']) if data['similarity'] > best_similarity: save = False print('dont save') if save: # best structure mining parameters self.model_metadata['alg_manag'] = ( self.parms['alg_manag'][best_parms['alg_manag']]) self.model_metadata['gate_management'] = ( self.parms['gate_management'][best_parms['gate_management']]) if self.parms['mining_alg'] == 'sm1': self.model_metadata['epsilon'] = best_parms['epsilon'] self.model_metadata['eta'] = best_parms['eta'] elif self.parms['mining_alg'] == 'sm2': self.model_metadata['concurrency'] = best_parms['concurrency'] self.model_metadata['similarity'] = best_similarity self.model_metadata['generated_at'] = ( datetime.now().strftime("%d/%m/%Y %H:%M:%S")) # Copy best model to destination folder destination = os.path.join( self.parms['bpmn_models'], self.parms['file'].split('.')[0] + '.bpmn') source = os.path.join(struc_model, self.parms['file'].split('.')[0] + '.bpmn') shutil.copyfile(source, destination) # Save metadata sup.create_json(self.model_metadata, metadata_file) # clean output folder shutil.rmtree(structure_optimizer.temp_output)
def _save_model(self, metadata_file, acc): # best structure mining parameters self.model_metadata['loss'] = acc['loss'] self.model_metadata['generated_at'] = ( datetime.now().strftime("%d/%m/%Y %H:%M:%S")) # Copy best model to destination folder destintion = os.path.join(self.parms['ia_gen_path'], self.parms['file'].split('.')[0] + '_dl.h5') source = os.path.join(self.temp_output, self.parms['file'].split('.')[0] + '_dl.h5') shutil.copyfile(source, destintion) # Save metadata sup.create_json(self.model_metadata, metadata_file) # clean output folder shutil.rmtree(self.temp_output)
def export_parms(self, output_folder, parms): if not os.path.exists(os.path.join(output_folder, 'parameters')): os.makedirs(os.path.join(output_folder, 'parameters')) parms['max_trace_size'] = int(self.log.groupby('caseid')['task'] .count().max()) parms['index_ac'] = self.index_ac parms['index_rl'] = self.index_rl sup.create_json(parms, os.path.join(output_folder, 'parameters', 'model_parameters.json')) self.log_test.to_csv(os.path.join(output_folder, 'parameters', 'test_log.csv'), index=False, encoding='utf-8')
def _save_model(self, metadata_file, times_optimizer, model_path): model_metadata = dict() # best structure mining parameters model_metadata['loss'] = times_optimizer.best_loss model_metadata['generated_at'] = ( datetime.now().strftime("%d/%m/%Y %H:%M:%S")) model_metadata['ac_index'] = self.ac_index model_metadata['usr_index'] = self.usr_index model_metadata['log_size'] = len( pd.DataFrame(self.log).caseid.unique()) model_metadata = {**model_metadata, **times_optimizer.best_parms} model_name = metadata_file.replace('_meta.json', '') if self.parms['model_type'] in ['inter', 'dual_inter', 'inter_nt']: model_metadata['roles'] = self.roles model_metadata['roles_table'] = self.roles_table.to_dict('records') model_metadata['inter_mean_states'] = self.mean_states # Save intecase scalers dump(self.inter_scaler, open(model_name + '_inter_scaler.pkl', 'wb')) if self.parms['model_type'] == 'dual_inter': dump(self.end_inter_scaler, open(model_name + '_end_inter_scaler.pkl', 'wb')) # Save models if isinstance(model_path, tuple): shutil.copyfile( os.path.join(times_optimizer.best_output, os.path.split(model_path[0])[1]), self.proc_model_path) shutil.copyfile( os.path.join(times_optimizer.best_output, os.path.split(model_path[1])[1]), self.wait_model_path) else: # Copy best model to destination folder source = os.path.join(times_optimizer.best_output, self.parms['file'].split('.')[0] + '.h5') shutil.copyfile(source, self.model_path) # Save metadata sup.create_json(model_metadata, metadata_file)