def fit(X, y, class_names): ''' function: train all the anomalious models ''' for model_name in class_names: indices = [i for i, label in enumerate(y) if label == model_name] train_data = [X[i] for i in indices] model_list, lengths = [], [] for i in range(len(train_data)): lengths.append(len(train_data[i])) try: train_data = np.concatenate(train_data) except ValueError: print('Oops!. something wrong...') ipdb.set_trace() lengths[-1] -= 1 model_generator = model_generation.get_model_generator( training_config.model_type_chosen, training_config.model_config) for model, now_model_config in model_generator: model = model.fit(train_data, lengths=lengths) # n_samples, n_features score = model_score.score(training_config.score_metric, model, train_data, lengths) if score == None: print "scorer says to skip this model, will do" continue model_list.append({ "model": model, "now_model_config": now_model_config, "score": score }) print 'score:', score model_generation.update_now_score(score) sorted_model_list = sorted(model_list, key=lambda x: x['score']) best = sorted_model_list[0] model_id = util.get_model_config_id(best['now_model_config']) anomaly_model_path = os.path.join( training_config.anomaly_model_save_path, model_name, training_config.config_by_user['data_type_chosen'], training_config.config_by_user['model_type_chosen'], training_config.model_id) if not os.path.isdir(anomaly_model_path): os.makedirs(anomaly_model_path) joblib.dump(best['model'], os.path.join(anomaly_model_path, "model_s%s.pkl" % (1, )))
def train_hmm_model(train_data, lengths): model_list = [] lengths[-1] -= 1 model_generator = model_generation.get_model_generator( training_config.model_type_chosen, training_config.model_config) for model, now_model_config in model_generator: model = model.fit(train_data, lengths=lengths) # n_samples, n_features score = model_score.score(training_config.score_metric, model, train_data, lengths) if score == None: print "scorer says to skip this model, will do" continue model_list.append({ "model": model, "now_model_config": now_model_config, "score": score }) print 'score:', score model_generation.update_now_score(score) sorted_model_list = sorted(model_list, key=lambda x: x['score']) best_model = sorted_model_list[0] model_id = util.get_model_config_id(best_model['now_model_config']) return best_model, model_id
def run(model_save_path, model_type, model_config, score_metric, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) list_of_trials = trials_group_by_folder_name.values() trials_amount = len(trials_group_by_folder_name) if not os.path.isdir(model_save_path): os.makedirs(model_save_path) one_trial_data_group_by_state = list_of_trials[0] state_amount = len(one_trial_data_group_by_state) training_data_group_by_state = {} training_length_array_group_by_state = {} for state_no in range(1, state_amount + 1): length_array = [] for trial_no in range(len(list_of_trials)): length_array.append(list_of_trials[trial_no][state_no].shape[0]) if trial_no == 0: data_tempt = list_of_trials[trial_no][state_no] else: data_tempt = np.concatenate( (data_tempt, list_of_trials[trial_no][state_no]), axis=0) training_data_group_by_state[state_no] = data_tempt training_length_array_group_by_state[state_no] = length_array if not os.path.isdir(model_save_path): os.makedirs(model_save_path) for state_no in range(1, state_amount + 1): model_list = [] train_data = training_data_group_by_state[state_no] lengths = training_length_array_group_by_state[state_no] best_model, model_id = train_hmm_model(train_data, lengths) joblib.dump( best_model['model'], os.path.join(model_save_path, "model_s%s.pkl" % (state_no, ))) joblib.dump( best_model['now_model_config'], os.path.join(model_save_path, "model_s%s_config_%s.pkl" % (state_no, model_id))) joblib.dump( None, os.path.join( model_save_path, "model_s%s_score_%s.pkl" % (state_no, best_model['score']))) train_report = [{ util.get_model_config_id(i['now_model_config']): i['score'] } for i in sorted_model_list] import json json.dump(train_report, open( os.path.join( model_save_path, "model_s%s_training_report.json" % (state_no)), 'w'), separators=(',\n', ': ')) # plot the hidden state sequence for each state print print print 'Finish fitting the posterior model -> Generating the hidden state sequence...' print print model = best_model['model'] if model_type in ['hmmlearn\'s HMM', 'hmmlearn\'s GMMHMM']: _, model.z = model.decode(X, algorithm="viterbi") elif model_type == 'BNPY\'s HMM': model.z = model.decode(X, lengths) elif model_type == 'PYHSMM\'s HMM': model.z = model.model.stateseqs[0] else: print 'Sorry, this model cannot obtain the hidden state sequence' return # plt.close("all") # Xdf = pd.DataFrame(X) # plot the original multimodal signals # Xdf.plot() # im_data = np.tile(model.z, 2) # cmap =cm.get_cmap('jet',np.max(model.z)) # print np.unique(model.z) # ax.imshow(im_data[None], aspect='auto', interpolation='nearest', vmin = 0, vmax = np.max(model.z), cmap = cmap, alpha = 0.5) fig, ax = plt.subplots(nrows=1, ncols=1) trial_len = len(model.z) / trials_amount color = iter(cm.rainbow(np.linspace(0, 1, trials_amount))) zhat = [] for iTrial in range(trials_amount): zSeq = model.z[iTrial * trial_len:(iTrial + 1) * trial_len] ax.plot(zSeq, color=next(color)) #, linewidth=2.0 zhat.append(zSeq.tolist() + [zSeq[-1]]) plt.show() zdf = pd.DataFrame(zhat) plt.title('The hidden state_sequence of state_%d' % (state_no)) zdf.to_csv(model_save_path + '/zhat.csv', index=False)
# config for preprocessing 'preprocessing_scaling' : False, # scaled data has zero mean and unit variance 'preprocessing_normalize' : False, # normalize the individual samples to have unit norm "l1" or 'l2' 'norm_style' : 'l2', 'pca_components' : 0, # cancel the pca processing # threshold of derivative used in hmm online anomaly detection 'deri_threshold' : 200, # threshold training c value in threshold=mean-c*std 'threshold_c_value' : 5 } model_config_set_name = model_store[config_by_user['model_type_chosen']]['use'] model_config = model_store[config_by_user['model_type_chosen']]['config_set'][model_config_set_name] model_id = util.get_model_config_id(model_config) model_id = config_by_user['score_metric']+model_id norm_style = config_by_user['norm_style'] success_path = os.path.join(config_by_user['dataset_path'], "success") test_success_data_path = os.path.join(config_by_user['dataset_path'], "success_for_test") model_save_path = os.path.join(config_by_user['base_path'], "model", config_by_user['data_type_chosen'], config_by_user['model_type_chosen']) figure_save_path = os.path.join(config_by_user['base_path'], "figure", config_by_user['data_type_chosen'], config_by_user['model_type_chosen'], model_id) # for anomaly analysis anomaly_data_path = config_by_user['dataset_path'] anomaly_raw_data_path = os.path.join(config_by_user['dataset_path'], 'anomalies') anomaly_model_save_path = os.path.join(config_by_user['base_path'], "anomaly_models", config_by_user['data_type_chosen'], config_by_user['model_type_chosen']) anomaly_identification_figure_path = os.path.join(config_by_user['base_path'], "figure", config_by_user['data_type_chosen'], config_by_user['model_type_chosen']) exec '\n'.join("%s=%r"%i for i in config_by_user.items())
def run(model_save_path, model_type, model_config, score_metric, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) list_of_trials = trials_group_by_folder_name.values() trials_amount = len(trials_group_by_folder_name) if not os.path.isdir(model_save_path): os.makedirs(model_save_path) one_trial_data_group_by_state = list_of_trials[0] state_amount = len(one_trial_data_group_by_state) training_data_group_by_state = {} training_length_array_group_by_state = {} for state_no in range(1, state_amount + 1): length_array = [] for trial_no in range(len(list_of_trials)): length_array.append(list_of_trials[trial_no][state_no].shape[0]) if trial_no == 0: data_tempt = list_of_trials[trial_no][state_no] else: data_tempt = np.concatenate( (data_tempt, list_of_trials[trial_no][state_no]), axis=0) training_data_group_by_state[state_no] = data_tempt training_length_array_group_by_state[state_no] = length_array if not os.path.isdir(model_save_path): os.makedirs(model_save_path) for state_no in range(1, state_amount + 1): model_list = [] model_generator = model_generation.get_model_generator( model_type, model_config) X = training_data_group_by_state[state_no] lengths = training_length_array_group_by_state[state_no] lengths[ -1] -= 1 # Adapting for bnpy's observation is firt-order autoregressive gaussian for model, now_model_config in model_generator: print print '-' * 20 print 'in state', state_no, ' working on config:', now_model_config model = model.fit(X, lengths=lengths) #n_samples, n_features score = model_score.score(score_metric, model, X, lengths) if score == None: print "scorer says to skip this model, will do" continue model_list.append({ "model": model, "now_model_config": now_model_config, "score": score }) print 'score:', score print '=' * 20 print model_generation.update_now_score(score) sorted_model_list = sorted(model_list, key=lambda x: x['score']) best = sorted_model_list[0] model_id = util.get_model_config_id(best['now_model_config']) joblib.dump( best['model'], os.path.join(model_save_path, "model_s%s.pkl" % (state_no, ))) joblib.dump( best['now_model_config'], os.path.join(model_save_path, "model_s%s_config_%s.pkl" % (state_no, model_id))) joblib.dump( None, os.path.join(model_save_path, "model_s%s_score_%s.pkl" % (state_no, best['score']))) train_report = [{ util.get_model_config_id(i['now_model_config']): i['score'] } for i in sorted_model_list] import json json.dump(train_report, open( os.path.join( model_save_path, "model_s%s_training_report.json" % (state_no)), 'w'), separators=(',\n', ': '))
def run(model_save_path, model_type, model_config, score_metric, trials_group_by_folder_name, test_trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) list_of_training_trial = trials_group_by_folder_name.values() test_trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( test_trials_group_by_folder_name) list_of_test_trial = test_trials_group_by_folder_name.values() if not os.path.isdir(model_save_path): os.makedirs(model_save_path) one_trial_data_group_by_state = list_of_training_trial[0] state_amount = len(one_trial_data_group_by_state) training_data_group_by_state = {} test_data_group_by_state = {} for state_no in range(1, state_amount + 1): training_data_group_by_state[state_no] = [] test_data_group_by_state[state_no] = [] for trial_no in range(len(list_of_training_trial)): training_data_group_by_state[state_no].append( list_of_training_trial[trial_no][state_no]) for trial_no in range(len(list_of_test_trial)): test_data_group_by_state[state_no].append( list_of_test_trial[trial_no][state_no]) if not os.path.isdir(model_save_path): os.makedirs(model_save_path) for state_no in range(1, state_amount + 1): print 'state_no', state_no sorted_model_list = train_model.run( list_of_train_mat=training_data_group_by_state[state_no], list_of_test_mat=test_data_group_by_state[state_no], model_type=model_type, model_config=model_config, score_metric=score_metric, ) best = sorted_model_list[0] model_id = util.get_model_config_id(best['now_model_config']) joblib.dump( best['model'], os.path.join(model_save_path, "model_s%s.pkl" % (state_no, ))) joblib.dump( best['now_model_config'], os.path.join(model_save_path, "model_s%s_config_%s.pkl" % (state_no, model_id))) joblib.dump( None, os.path.join(model_save_path, "model_s%s_score_%s.pkl" % (state_no, best['score']))) train_report = [{ util.get_model_config_id(i['now_model_config']): i['score'] } for i in sorted_model_list] import json json.dump(train_report, open( os.path.join( model_save_path, "model_s%s_training_report.json" % (state_no)), 'w'), separators=(',\n', ': '))