def label_data(model_path, train_csv_file, train_audio_dir): """ Label the data using a particular model and save the softmax values. Generates one softmax values per file """ sr = 32000 df = pd.read_csv(train_csv_file) x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) file_names = df.iloc[:, 0].values print(file_names) with tf.Graph().as_default() as graph: mel_filt = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T mel_filt = tf.convert_to_tensor(mel_filt, dtype=tf.float32) pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio') model = CleverHansModel(model_path + '.meta', sr, generator, mel_filt) saver = model.build_graph(pcm) probs = [] temp = np.zeros((len(file_names), 41)) print(temp.shape) #temp = {} with tf.Session(graph=graph) as sess: saver.restore(sess, model_path) print(len(file_names)) for i in range(len(file_names)): data, _ = utils_tf._preprocess_data(train_audio_dir, file_names[i]) l = sess.run([model.get_probs()], feed_dict={pcm: data}) l = np.squeeze(l) if (l.ndim != 1): l = np.mean(l, axis=0) temp[i, :] = l # temp[file_names[i]] = l print(i) # print(temp) #file = open('label_data','wb') #np.save('labels.npy',temp) #pickle.dump(temp,file) #file.close() return
def target(): """ Label the data using a particular model and save the softmax values. Generates one softmax values per file """ flags = parse_flags() hparams = parse_hparams(flags.hparams) num_classes = 41 df = pd.read_csv(flags.infer_csv_file) file_names = df.iloc[:, 0].values count = 0 sr = 32000 df = pd.read_csv(flags.infer_csv_file) x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) file_names = df.iloc[:, 0].values with tf.Graph().as_default() as graph: mel_filt = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T mel_filt = tf.convert_to_tensor(mel_filt, dtype=tf.float32) pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio') model = CleverHansModel(flags.save_model_dir + '.meta', sr, generator, mel_filt) saver = model.build_graph(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess, flags.save_model_dir) print(len(file_names)) for i in range(100): data, _ = utils_tf._preprocess_data(flags.infer_audio_dir, file_names[i]) l = sess.run([model.get_probs()], feed_dict={pcm: data}) l = np.squeeze(l) if (l.ndim != 1): l = np.mean(l, axis=0) lab = utils_tf._convert_label_name_to_label(df.iloc[i, 1]) if (lab == np.argmax(l)): count += 1 print(lab, np.argmax(l)) print(count / 100)
def deepfoolcochlear(audio_path,metadata_path,model_path,exp_data_path,adv_audio_path,save_data=False): #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data #Load dataset to normalize new data df = pd.read_csv(metadata_path) label_names= df.iloc[:,2].values file_names = df.iloc[:,1].values sample_rate = 32000 audio_name = [] audio_length = [] original_label = [] original_confidence = [] new_label = [] new_confidence = [] new_o_label_conf = [] snr = [] with tf.Graph().as_default() as graph: model = CochlearModel(model_path +'.meta') pcm = tf.placeholder(tf.float32,shape=[None,None],name='input_audio') saver= model.build_graph(pcm) deepfool = DFM.DeepFool(model) deepfool.build_attack(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess,model_path) for i in range(1,2): audio_file_name = file_names[i] try: data,q = utils_tf._preprocess_data(audio_path,audio_file_name) data = np.expand_dims(data,axis=0) except EOFError: print("EOF Error") labels= utils_tf._convert_label_name_to_label(label_names[i]) s = sess.run([model.get_probs()],feed_dict={'input_audio:0':data}) s = np.squeeze(s) if (s.ndim != 1): s = np.mean(s,axis=0) print('Ground truth:',labels) print('Original label number:',np.argmax(s)) print('Original label confidence:',np.max(s)) tic = time.process_time() adv = deepfool.attack(sess,data,1) toc = time.process_time() print('Time for processing sample:',toc-tic,'for iteration:',i) preds = sess.run([model.get_probs()],feed_dict={pcm:adv}) preds = np.squeeze(preds) if(preds.ndim !=1): preds = np.mean(preds,axis=0) print('New label number:',np.argmax(preds)) print('New label confidence:',np.max(preds)) adv = np.squeeze(adv) if(save_data): librosa.output.write_wav(adv_audio_path + 'adv-' + audio_file_name,adv,sample_rate) audio_name.append(audio_file_name) audio_length.append(int(q)) original_label.append(np.argmax(s)) original_confidence.append(np.max(s)) new_label.append(np.argmax(preds)) new_confidence.append(np.max(preds)) new_o_label_conf.append(preds[np.argmax(s)]) snr.append(10*np.log10(np.mean(data**2)/(np.mean((adv-data)**2)))) if(save_data): df_deepfool = pd.DataFrame({'audio_name':audio_name,'audio_length':audio_length,'original_label':original_label,'original_confidence':original_confidence,'new_label':new_label,'new_confidence':new_confidence,'new_orig_conf':new_o_label_conf,'SNR':snr}) with open(exp_data_path,'a') as f: df_deepfool.to_csv(f,header=False)
def lbfgstargeted(audio_path, metadata_path, model_path, exp_data_path, adv_audio_path, save_data=False): #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data #Load dataset to normalize new data x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) df = pd.read_csv(metadata_path) gt_labels = df.iloc[:, 2].values file_names = df.iloc[:, 1].values mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T sample_rate = 32000 label_list = [ "Bass_drum", "Cello", "Clarinet", "Oboe", "Snare_drum", "Violin_or_fiddle" ] audio_name = [] audio_length = [] original_label = [] original_confidence = [] new_label = [] new_confidence = [] new_o_label_conf = [] snr = [] with tf.Graph().as_default() as graph: mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32) model = CleverHansModel(model_path + '.meta', sample_rate, generator, mel_filt) pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio') saver = model.build_graph(pcm) lbfgs = LB.LBFGS(model, binary_search_steps=2, max_iterations=200) lbfgs.build_attack(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess, model_path) for i in range(df.shape[0]): audio_file_name = file_names[i] try: data, q = utils_tf._preprocess_data(audio_path, audio_file_name) except EOFError: print("EOF Error") gt_label = gt_labels[i] s = sess.run([model.get_probs()], feed_dict={'input_audio:0': data}) s = np.squeeze(s) if (s.ndim != 1): s = np.mean(s, axis=0) print('Original label number:', np.argmax(s), 'GT:', gt_label) print('Original label confidence:', np.max(s)) for l in range(len(label_list)): label = utils_tf._convert_label_name_to_label(label_list[l]) if (label == gt_label): continue tic = time.process_time() adv = lbfgs.attack(sess, data, np.repeat(label, int(q))) toc = time.process_time() print('Time for processing sample:', toc - tic, 'for iteration:', i) preds = sess.run([model.get_probs()], feed_dict={pcm: adv}) preds = np.squeeze(preds) if (preds.ndim != 1): preds = np.mean(preds, axis=0) print('New label number:', np.argmax(preds)) print('New label confidence:', np.max(preds)) if (save_data): librosa.output.write_wav( adv_audio_path + 'adv-' + label_list[l] + '-' + audio_file_name, adv, sample_rate) audio_name.append(audio_file_name) audio_length.append(int(q)) original_label.append(np.argmax(s)) original_confidence.append(np.max(s)) new_label.append(np.argmax(preds)) new_confidence.append(np.max(preds)) new_o_label_conf.append(preds[np.argmax(s)]) snr.append( 10 * np.log10(np.mean(data**2) / (np.mean((adv - data)**2)))) if (save_data): df_deepfool = pd.DataFrame({ 'audio_name': audio_name, 'audio_length': audio_length, 'original_label': original_label, 'original_confidence': original_confidence, 'new_label': new_label, 'new_confidence': new_confidence, 'new_orig_conf': new_o_label_conf, 'SNR': snr }) with open(exp_data_path, 'a') as f: df_deepfool.to_csv(f, header=False)
def inferencecochlear(audio_path, metadata_path, model_path, exp_data_path, adv_audio_path, save_data=False): #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data #Load dataset to normalize new data df = pd.read_csv(metadata_path) label_names = df.iloc[:, 1].values file_names = df.iloc[:, 0].values sample_rate = 32000 audio_name = [] inferred_label = [] inferred_confidence = [] ground_truth = [] with tf.Graph().as_default() as graph: model = CochlearModel(model_path + '.meta') pcm = tf.placeholder(tf.float32, shape=[None, None], name='input_audio') saver = model.build_graph(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess, model_path) count = 0 count_tot = 0 for i in range(df.shape[0]): audio_file_name = file_names[i] try: data, q = utils_tf._preprocess_data(audio_path, audio_file_name) data = np.expand_dims(data, axis=0) except EOFError: print("EOF Error") gt_label = utils_tf._convert_label_name_to_label(label_names[i]) s = sess.run([model.get_probs()], feed_dict={'input_audio:0': data}) s = np.squeeze(s) if (s.ndim != 1): s = np.mean(s, axis=0) count_tot += 1 if (gt_label == np.argmax(s)): count += 1 if (i % 1000 == 0): print('Iteration number:', i) print('Current accuracy:', float(count / count_tot)) audio_name.append(audio_file_name) inferred_label.append(np.argmax(s)) inferred_confidence.append(np.max(s)) ground_truth.append(gt_label) if (save_data): df_infer = pd.DataFrame({ 'audio_name': audio_name, 'ground_truth': ground_truth, 'inferred_label': inferred_label, 'inferred_confidence': inferred_confidence }) with open(exp_data_path, 'w') as f: df_infer.to_csv(f, header=False)
def inferenceiqbal(audio_path, metadata_path, model_path, exp_data_path, adv_audio_path, save_data=False): #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data #Load dataset to normalize new data x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) df = pd.read_csv(metadata_path) label_names = df.iloc[:, 1].values file_names = df.iloc[:, 0].values mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T sample_rate = 32000 audio_name = [] ground_truth = [] inferred_label = [] inferred_confidence = [] with tf.Graph().as_default() as graph: mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32) model = CleverHansModel(model_path + '.meta', sample_rate, generator, mel_filt) pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio') saver = model.build_graph(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess, model_path) count = 0 count_tot = 0 for i in range(df.shape[0]): audio_file_name = file_names[i] try: data, q = utils_tf._preprocess_data(audio_path, audio_file_name) except EOFError: print("EOF Error") gt_label = utils_tf._convert_label_name_to_label(label_names[i]) s = sess.run([model.get_probs()], feed_dict={'input_audio:0': data}) s = np.squeeze(s) if (s.ndim != 1): s = np.mean(s, axis=0) label = np.argmax(s) count_tot += 1 if (label == gt_label): count += 1 if (i % 1000 == 0): print('Iteration number:', i) print('Current accuracy:', float(count / count_tot)) audio_name.append(audio_file_name) ground_truth.append(gt_label) inferred_label.append(label) inferred_confidence.append(np.max(s)) if (save_data): df_deepfool = pd.DataFrame({ 'audio_name': audio_name, 'ground_truth': ground_truth, 'inferred_label': inferred_label, 'inferred_confidence': inferred_confidence }) with open(exp_data_path, 'w') as f: df_deepfool.to_csv(f, header=False)
def carliniwagneruntargetedcochlear(audio_path, metadata_path, model_path, exp_data_path, adv_audio_path, save_data=False): #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data #Load dataset to normalize new data df = pd.read_csv(metadata_path) label_names = df.iloc[:, 2].values file_names = df.iloc[:, 1].values sample_rate = 32000 audio_name = [] audio_length = [] original_label = [] original_confidence = [] new_label = [] new_confidence = [] new_o_label_conf = [] snr = [] with tf.Graph().as_default() as graph: model = CochlearModel(model_path + '.meta') pcm = tf.placeholder(tf.float32, shape=[None, None], name='input_audio') carliniwagner = CW.CarliniWagnerAttack(model, learning_rate=1e-5, initial_const=1e-2, max_iterations=1000, confidence=500, binary_search_steps=2) saver = carliniwagner.build_attack(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess, model_path) for i in range(df.shape[0]): audio_file_name = file_names[i] try: data, q = utils_tf._preprocess_data(audio_path, audio_file_name) data = np.expand_dims(data, axis=0) except EOFError: print("EOF Error") label = utils_tf._convert_label_name_to_label(label_names[i]) print('Ground truth label:', label_names[i], label) tic = time.process_time() adv, o_label, o_conf, n_label, n_conf, n_conf_gt = carliniwagner.attack( sess, data, label, label, 1, prob_thresh=0.0244) toc = time.process_time() print('Time for iteration:', i, 'is', toc - tic) adv = np.squeeze(adv) if (save_data): librosa.output.write_wav( adv_audio_path + 'adv-' + audio_file_name, adv, sample_rate) audio_name.append(audio_file_name) audio_length.append(int(q)) original_label.append(o_label) original_confidence.append(o_conf) new_label.append(n_label) new_confidence.append(n_conf) new_o_label_conf.append(n_conf_gt) snr.append(10 * np.log10(np.mean(data**2) / (np.mean((adv - data)**2)))) if (save_data): df_cw = pd.DataFrame({ 'audio_name': audio_name, 'audio_length': audio_length, 'original_label': original_label, 'original_confidence': original_confidence, 'new_label': new_label, 'new_confidence': new_confidence, 'new_orig_conf': new_o_label_conf, 'SNR': snr }) with open(exp_data_path, 'w') as f: df_cw.to_csv(f, header=False)
def carliniwagnertargeted(audio_path, metadata_path, model_path, exp_data_path, adv_audio_path, save_data=False): #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data #Load dataset to normalize new data x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) df = pd.read_csv(metadata_path) gt_labels = df.iloc[:, 2].values file_names = df.iloc[:, 1].values mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T sample_rate = 32000 label_list = [ "Bass_drum", "Cello", "Clarinet", "Oboe", "Snare_drum", "Violin_or_fiddle" ] audio_name = [] audio_length = [] original_label = [] original_confidence = [] new_label = [] new_confidence = [] new_o_label_conf = [] snr = [] with tf.Graph().as_default() as graph: mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32) model = CleverHansModel(model_path + '.meta', sample_rate, generator, mel_filt) pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio') carliniwagner = CW.CarliniWagnerAttack(model, learning_rate=1e-5, confidence=500, targeted=True, max_iterations=1000, binary_search_steps=2) saver = carliniwagner.build_attack(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess, model_path) for i in range(df.shape[0]): audio_file_name = file_names[i] try: data, q = utils_tf._preprocess_data(audio_path, audio_file_name) except EOFError: print("EOF Error") gt_label = gt_labels[i] print('Ground truth label:', gt_label, 'Audio_file:', file_names[i]) for l in range(len(label_list)): label = utils_tf._convert_label_name_to_label(label_list[l]) if (label == gt_label): continue adv, o_label, o_conf, n_label, n_conf, n_gt_conf = carliniwagner.attack( sess, data, label, np.repeat(label, int(q)), int(q), prob_thresh=0.975) if (save_data): librosa.output.write_wav( adv_audio_path + 'adv-' + label_list[l] + '-' + audio_file_name, adv, sample_rate) audio_name.append(audio_file_name) audio_length.append(int(q)) original_label.append(o_label) original_confidence.append(o_conf) new_label.append(n_label) new_confidence.append(n_conf) new_o_label_conf.append(n_gt_conf) snr.append( 10 * np.log10(np.mean(data**2) / (np.mean(adv - data)**2))) if (save_data): df_cw = pd.DataFrame( { 'audio_name': audio_name, 'audio_length': audio_length, 'original_label': original_label, 'original_confidence': original_confidence, 'new_label': new_label, 'new_confidence': new_confidence, 'SNR': snr }, columns=[ 'audio_name', 'audio_length', 'original_label', 'original_confidence', 'new_label', 'new_confidence', 'SNR' ]) with open(exp_data_path, 'a') as f: df_cw.to_csv(f)
def main(): flags = parse_flags() hparams = parse_hparams(flags.hparams) print(hparams) num_classes=41 df = pd.read_csv(flags.infer_csv_file) file_names = df.iloc[:,0].values labels = df.iloc[:,1].values gt = [] orig_label = [] adv_label = [] orig_pred = [] adv_pred = [] snr = [] files = [] t = [] with tf.Graph().as_default() and tf.Session() as sess: if(hparams.vgg13_features): substitute_model = model.vgg13(hparams,num_classes) else: substitute_model = model.BaselineCNN(hparams,num_classes) cw = CW.CarliniWagnerAttack(model=substitute_model,save_model_dir=flags.save_model_dir,sess=sess,hparams=hparams) cw.build_attack() for i in range(len(file_names)): start_time=time.time() #call = ['ffmpeg','-v','quiet','-i',os.path.join(flags.infer_audio_dir,file_names[i]),'-f','f32le', '-ar',str(hparams.sample_rate),'-ac','1','pipe:1'] #samples = subprocess.check_output(call) #data = np.frombuffer(samples, dtype=np.float32) data,_ = utils_tf._preprocess_data(flags.infer_audio_dir,file_names[i]) lab = utils_tf._convert_label_name_to_label(labels[i]) print(data.shape) if(hparams.targeted): set_target=False while(not set_target): target_label = np.random.randint(41) if(lab != target_label): set_target=True else: target_label = lab audio,pred,pred_orig,noise = cw.attack(data,target_label) if(audio is None): continue print('TIME IN SECONDS!',time.time()-start_time) gt.append(lab) orig_label.append(np.argmax(pred_orig)) orig_pred.append(np.max(pred_orig)) adv_label.append(np.argmax(pred)) adv_pred.append(np.max(pred)) snr.append(noise) files.append(file_names[i]) t.append(time.time()-start_time) wav.write(os.path.join(flags.write_audio_dir,file_names[i]),44100,audio) df_out = pd.DataFrame({'fname':files,'gt':gt,'original_label':orig_label,'original_pred': orig_pred,'adv_label':adv_label,'adv_pred':adv_pred,'snr':snr,'time':t}) df_out.to_csv('adv_data_vgg13.csv',index=False)