def train_gmm(gmm_save_path, train_vectors, num_clusters, max_iter=500): # fit a Gaussian Mixture Model lowest_bic = np.infty bic = [] best_component = '' best_cv = '' best_gmm = {} n_components_range = range( 2, num_clusters) # specifying maximum number of clusters cv_types = cov_types for cv_type in cv_types: for n_components in n_components_range: # Fit a mixture of Gaussians with EM gmm = mixture.GaussianMixture(n_components=n_components, covariance_type=cv_type, max_iter=max_iter) gmm.fit(train_vectors) bic.append(gmm.bic(train_vectors)) if bic[-1] < lowest_bic: lowest_bic = bic[-1] best_component = n_components best_cv = cv_type best_gmm = gmm print("best n_component {}".format(best_component)) print("best gmm type {}".format(best_cv)) persist_object(best_gmm, gmm_save_path) return best_gmm
def train_data(self, preprocessed_data): params_path = os.path.join(self.model_path, "model_params.pkl") fft_d = preprocessed_data self.means = np.mean(fft_d, axis=0) self.stds = np.std(fft_d, axis=0) self.gen_gaussians() params_dic = {'freqs': self.freqs, 'means': self.means, 'stds': self.stds, 'gaussians': self.gaussians} persist_object(params_dic, params_path) self.loaded = True
def preprocess_train(self, iq_data, sample_rate): params_path = os.path.join(self.model_path, "model_params.pkl") ## getting spectrogram self.freqs, time, fft_list = iq2fft(iq_data, sample_rate, self.rbw, mode=['real', 'imag']) fft_iq = np.stack(fft_list, axis=-1) self.means = np.mean(fft_iq, axis=0) self.stds = np.std(fft_iq, axis=0) self.gen_gaussians() params_dic = {'freqs': self.freqs, 'means': self.means, 'stds': self.stds, 'gaussians': self.gaussians} persist_object(params_dic, params_path) self.loaded = True
def save_roc_plot(iq_normal, sample_rate, dBs, num_ROC_samples=500, score_method=None, score_name='normal'): fprs, tprs, aucs = [], [], [] normal_plot_saved = 0 for ind, dB in enumerate(dBs): gc.collect() print('creating anomaly with {}dB...'.format(dB)) sweep_params['dB'] = dB basic_len = get_basic_block_len(sample_rate) roc_start_indices_path = os.path.join(model_root, 'roc_start_indices.pkl') num_samples = num_ROC_samples if os.path.isfile(roc_start_indices_path): a_starts, c_starts = load_object(roc_start_indices_path) if len(a_starts) != num_samples: print( 'wrong length of roc_start_indices.pkl...\nchanging to roc_start_indices.pkl len' ) num_samples = len(a_starts) else: a_starts = np.random.randint(0, iq_normal.shape[0] - basic_len, (num_samples, )) c_starts = np.random.randint(0, iq_normal.shape[0] - basic_len, (num_samples, )) persist_object([a_starts, c_starts], roc_start_indices_path) tot_starts = np.concatenate([a_starts, c_starts]) y_true = np.concatenate( [np.ones((num_samples, )), np.zeros((num_samples, ))]).astype(bool) y_score = np.zeros((2 * num_samples, )) for i in range(2 * num_samples): if y_true[i]: basic_iq = trim_iq_basic_block(iq_normal, sample_rate, start=tot_starts[i]) basic_iq = CW(basic_iq, sample_rate, anomal_freq, dB) if i < 2: model.plot_prediction(basic_iq, sample_rate) f = plt.gcf() f.suptitle('using model "' + model.name + '" on sweep with ISR: ' + str(dB) + 'dB') f.set_size_inches(12, 8, forward=True) fig_path = os.path.join( plots_path, '{0:02d}_ISR_dB_{1}_sample_{2}'.format(ind, dB, i)) save_fig(f, fig_path) plt.close() else: basic_iq = trim_iq_basic_block(iq_normal, sample_rate, start=tot_starts[i]) if normal_plot_saved < 2: model.plot_prediction(basic_iq, sample_rate) f = plt.gcf() f.suptitle('using model "' + model.name + '" on normal data') f.set_size_inches(12, 8, forward=True) fig_path = os.path.join( plots_path, 'normal_sample_{0}'.format(normal_plot_saved)) save_fig(f, fig_path) plt.close() normal_plot_saved += 1 if score_method: y_score[i] = score_method(basic_iq, sample_rate) else: y_score[i] = model.predict_basic_block_score( basic_iq, sample_rate) fpr, tpr, thresholds = roc_curve(y_true, y_score) fprs.append(fpr) tprs.append(tpr) aucs.append(roc_auc_score(y_true, y_score)) # ploting f = plt.figure(0) for j in range(len(dBs) - 1, -1, -1): plt.plot(fprs[j], tprs[j]) plt.legend([ 'anomaly in {}dB, auc: {:.3f}'.format(dBs[j], aucs[j]) for j in range(len(dBs) - 1, -1, -1) ]) plt.xlabel('False Positive Rate', fontsize=18) plt.ylabel('True Positive Rate', fontsize=18) plt.title('ROC for anomalies with different ISR [dB]', fontsize=20) plt.gca().grid(True) f.set_size_inches(8, 6.5, forward=True) fig_path = os.path.join(plots_path, 'All_ROCs_' + score_name) save_fig(f, fig_path) plt.close() persist_object( { 'dBs': dBs, 'aucs': aucs, 'name': model.name + '_score_' + score_name }, os.path.join(plots_path, 'roc_score_' + score_name + '.pkl')) f = plt.figure(1) plt.plot(dBs, aucs, '-o') plt.ylim([0, 1]) plt.xlabel('ISR in dB of anomaly', fontsize=18) plt.ylabel('AUC score', fontsize=18) plt.title( 'Sweep anomaly\nArea Under Curve as function of\nInterference Signal Ratio', fontsize=20) plt.gca().grid(True) f.set_size_inches(8, 6.5, forward=True) fig_path = os.path.join(plots_path, 'dB_vs_AUC_' + score_name) save_fig(f, fig_path) plt.close()
def scale_error_vectors(errors,weights_dir): error_scaler_path = os.path.join(weights_dir,"error_train_scaler.pkl") scaled_errors_path = os.path.join(weights_dir,"train_errors.pkl") (scaled_errors, error_scaler) = scale_train_vectors(errors, error_scaler_path) persist_object(scaled_errors, scaled_errors_path) return scaled_errors , error_scaler
# # Model training weights_save_path = namespace.weights_path (X_train, Y_train, X_val, Y_val) = split_train_validation(X_train, Y_train,validation_split) train_model(model_obj, X_train, Y_train, X_val, Y_val,validation_split) #Predict errors train_errors = predict_rnn_error_vectors(X_train, Y_train, model_obj, batch_size) val_errors = predict_rnn_error_vectors(X_val, Y_val, model_obj) # # Scale errors (scaled_train_errors, error_scaler) = scale_error_vectors(train_errors, weights_dir) scaled_val_errors = error_scaler.transform(val_errors) persist_object(scaled_val_errors, val_errors_path) #GMM training gmm = train_gmm(gmm_save_path,scaled_train_errors,num_clusters) train_scores = (gmm.score_samples(scaled_train_errors)) persist_object(train_scores, train_scores_path) val_scores = (gmm.score_samples(scaled_val_errors)) persist_object(val_scores, val_scores_path) val_emds = compute_emd_split_samples(val_scores, train_scores) persist_object(val_emds, val_emds_path) else: test_data = load_iq_test_data(data_dir,weights_dir)
def save_model(self): max_path = os.path.join(self.model_path, "cepstrum_max.pkl") means_path = os.path.join(self.model_path, "cepstrum_train_means.pkl") persist_object(self.cepstrum_max, max_path) persist_object(self.cepstrum_means, means_path)