for t in targets: # search for adversary X_adv, P_adv = find_adversary( model=dnn_model, X0=X0, label=t, P0=Phs, mu=mu, epsilon=epsilon, maxits=maxits, stop_thresh=stop_thresh, griffin_lim=True) # get time-domain representation x_adv = overlap_add( np.hstack((X_adv, X_adv[:,-2:-nfft/2-1:-1])) * np.exp(1j*P_adv)) out_snr = 20*np.log10(np.linalg.norm(x[nfft:-nfft]) / np.linalg.norm(x[nfft:-nfft]-x_adv[nfft:-nfft])) # BEFORE FILTERING # =========================================== # dnn prediction pred = np.argmax(np.sum(fprop(X_adv), axis=0)) if pred == t: dnn_file.write('{}\t'.format(int(out_snr+.5))) else: dnn_file.write('{}\t'.format('na')) # aux prediction X_adv_agg = aggregate_features(dnn_model, X_adv, which_layers) pred = np.argmax(np.bincount(np.array(aux_model.predict(X_adv_agg), dtype='int'))) if pred == t:
def file_misclass_error_printf(dnn_model, root_dir, dataset, save_file, mode='all_same', label=0, snr=30, aux_model=None, aux_save_file=None, which_layers=None, save_adversary_audio=None, fwd_xform=None, back_xform=None): """ Function to compute the file-level classification error by classifying individual frames and then voting for the class with highest cumulative probability """ if fwd_xform is None: print 'fwd_xform=None, using identity' fwd_xform = lambda X: X if back_xform is None: print 'back_xform=None, using identity' back_xform = lambda X: X n_classes = len(dataset.targets) X = dnn_model.get_input_space().make_theano_batch() Y = dnn_model.fprop(X) fprop_theano = theano.function([X], Y) input_space = dnn_model.get_input_space() if isinstance(input_space, Conv2DSpace): tframes, dim = input_space.shape view_converter = DefaultViewConverter((tframes, dim, 1)) else: dim = input_space.dim tframes = 1 view_converter = None if view_converter is not None: def fprop(batch): nframes = batch.shape[0] thop = 1. sup = np.arange(0, nframes - tframes + 1, np.int(tframes / thop)) data = np.vstack([ np.reshape(batch[i:i + tframes, :], (tframes * dim, )) for i in sup ]) data = fwd_xform(data) return fprop_theano( view_converter.get_formatted_batch(data, input_space)) else: fprop = fprop_theano n_examples = len(dataset.file_list) target_space = dnn_model.get_output_space() #VectorSpace(dim=n_classes) feat_space = dnn_model.get_input_space( ) #VectorSpace(dim=dataset.nfft//2+1, dtype='complex64') data_specs = (CompositeSpace( (feat_space, target_space)), ("songlevel-features", "targets")) iterator = dataset.iterator(mode='sequential', batch_size=1, data_specs=data_specs) if aux_model: aux_fname = open(aux_save_file, 'w') aux_writer = csv.writer(aux_fname, delimiter='\t') with open(save_file, 'w') as fname: dnn_writer = csv.writer(fname, delimiter='\t') for i, el in enumerate(iterator): # display progress indicator 'Progress: %2.0f%%\r' % (100 * i / float(n_examples)) Mag, Phs = np.abs(el[0], dtype=np.float32), np.angle(el[0]) epsilon = np.linalg.norm(Mag) / Mag.shape[0] / 10**(snr / 20.) if mode == 'all_same': target = label elif mode == 'perfect': target = el[1] elif mode == 'random': target = np.random.randint(n_classes) elif mode == 'all_wrong': cand = np.setdiff1d(np.arange(n_classes), np.array( el[1])) # remove ground truth label from set of options target = cand[np.random.randint(len(cand))] if 1: # re-read audio (seems to be bug when reading from h5) f = el[2] if f.endswith('.wav'): read_fun = audiolab.wavread elif f.endswith('.au'): read_fun = audiolab.auread elif f.endswith('.mp3'): read_fun = read_mp3 x, fstmp, _ = read_fun(os.path.join(root_dir, f)) # make mono if len(x.shape) != 1: x = np.sum(x, axis=1) / 2. seglen = 30 x = x[:fstmp * seglen] fs = 22050 if fstmp != fs: x = samplerate.resample(x, fs / float(fstmp), 'sinc_best') Mag, Phs = compute_fft(x) Mag = Mag[:1200, :513] Phs = Phs[:1200, :513] epsilon = np.linalg.norm(Mag) / Mag.shape[0] / 10**(snr / 20.) else: raise ValueError( "Check that song-level iterator is indeed returning 'raw data'" ) X_adv, P_adv = find_adversary( model=dnn_model, X0=Mag, label=target, fwd_xform=fwd_xform, back_xform=back_xform, P0=np.hstack((Phs, -Phs[:, -2:-dataset.nfft / 2 - 1:-1])), mu=.01, epsilon=epsilon, maxits=50, stop_thresh=0.5, griffin_lim=False #True ) if save_adversary_audio: nfft = 2 * (X_adv.shape[1] - 1) nhop = nfft // 2 x_adv = overlap_add( np.hstack((X_adv, X_adv[:, -2:-nfft // 2 - 1:-1])) * np.exp(1j * P_adv), nfft, nhop) audiolab.wavwrite(x_adv, os.path.join(save_adversary_audio, el[2]), 22050, 'pcm16') #frame_labels = np.argmax(fprop(X_adv), axis=1) #hist = np.bincount(frame_labels, minlength=n_classes) fpass = fprop(X_adv) conf = np.sum(fpass, axis=0) / float(fpass.shape[0]) dnn_label = np.argmax(conf) #np.argmax(hist) # most used label true_label = el[1] # truncate to correct length ext = min(Mag.shape[0], X_adv.shape[0]) Mag = Mag[:ext, :] X_adv = X_adv[:ext, :] X_diff = Mag - X_adv out_snr = 20 * np.log10( np.linalg.norm(Mag) / np.linalg.norm(X_diff)) dnn_writer.writerow([ dataset.file_list[i], true_label, dnn_label, out_snr, conf[dnn_label] ]) print 'Mode:{}, True label:{}, Adv label:{}, Sel label:{}, Conf:{}, Out snr: {}'.format( mode, true_label, target, dnn_label, conf[dnn_label], out_snr) if aux_model: fft_agg = aggregate_features(dnn_model, X_adv, which_layers) aux_vote = np.argmax( np.bincount( np.array(aux_model.predict(fft_agg), dtype='int'))) aux_writer.writerow( [dataset.file_list[i], true_label, aux_vote]) print 'AUX adversarial label: {}'.format(aux_vote) if aux_model: aux_fname.close() print ''
for t in targets: # search for adversary X_adv, P_adv = find_adversary(model=dnn_model, X0=X0, label=t, P0=Phs, mu=mu, epsilon=epsilon, maxits=maxits, stop_thresh=stop_thresh, griffin_lim=True) # get time-domain representation x_adv = overlap_add( np.hstack((X_adv, X_adv[:, -2:-nfft / 2 - 1:-1])) * np.exp(1j * P_adv)) out_snr = 20 * np.log10( np.linalg.norm(x[nfft:-nfft]) / np.linalg.norm(x[nfft:-nfft] - x_adv[nfft:-nfft])) # BEFORE FILTERING # =========================================== # dnn prediction pred = np.argmax(np.sum(fprop(X_adv), axis=0)) if pred == t: dnn_file.write('{}\t'.format(int(out_snr + .5))) else: dnn_file.write('{}\t'.format('na')) # aux prediction
def file_misclass_error_printf(dnn_model, root_dir, dataset, save_file, mode='all_same', label=0, snr=30, aux_model=None, aux_save_file=None, which_layers=None, save_adversary_audio=None, fwd_xform=None, back_xform=None): """ Function to compute the file-level classification error by classifying individual frames and then voting for the class with highest cumulative probability """ if fwd_xform is None: print 'fwd_xform=None, using identity' fwd_xform = lambda X: X if back_xform is None: print 'back_xform=None, using identity' back_xform = lambda X: X n_classes = len(dataset.targets) X = dnn_model.get_input_space().make_theano_batch() Y = dnn_model.fprop(X) fprop_theano = theano.function([X],Y) input_space = dnn_model.get_input_space() if isinstance(input_space, Conv2DSpace): tframes, dim = input_space.shape view_converter = DefaultViewConverter((tframes, dim, 1)) else: dim = input_space.dim tframes = 1 view_converter = None if view_converter is not None: def fprop(batch): nframes = batch.shape[0] thop = 1. sup = np.arange(0,nframes-tframes+1, np.int(tframes/thop)) data = np.vstack([np.reshape(batch[i:i+tframes, :],(tframes*dim,)) for i in sup]) data = fwd_xform(data) return fprop_theano(view_converter.get_formatted_batch(data, input_space)) else: fprop = fprop_theano n_examples = len(dataset.file_list) target_space = dnn_model.get_output_space() #VectorSpace(dim=n_classes) feat_space = dnn_model.get_input_space() #VectorSpace(dim=dataset.nfft//2+1, dtype='complex64') data_specs = (CompositeSpace((feat_space, target_space)), ("songlevel-features", "targets")) iterator = dataset.iterator(mode='sequential', batch_size=1, data_specs=data_specs) if aux_model: aux_fname = open(aux_save_file, 'w') aux_writer = csv.writer(aux_fname, delimiter='\t') with open(save_file, 'w') as fname: dnn_writer = csv.writer(fname, delimiter='\t') for i,el in enumerate(iterator): # display progress indicator 'Progress: %2.0f%%\r' % (100*i/float(n_examples)) Mag, Phs = np.abs(el[0], dtype=np.float32), np.angle(el[0]) epsilon = np.linalg.norm(Mag)/Mag.shape[0]/10**(snr/20.) if mode == 'all_same': target = label elif mode == 'perfect': target = el[1] elif mode == 'random': target = np.random.randint(n_classes) elif mode == 'all_wrong': cand = np.setdiff1d(np.arange(n_classes),np.array(el[1])) # remove ground truth label from set of options target = cand[np.random.randint(len(cand))] if 1: # re-read audio (seems to be bug when reading from h5) f = el[2] if f.endswith('.wav'): read_fun = audiolab.wavread elif f.endswith('.au'): read_fun = audiolab.auread elif f.endswith('.mp3'): read_fun = read_mp3 x, fstmp, _ = read_fun(os.path.join(root_dir, f)) # make mono if len(x.shape) != 1: x = np.sum(x, axis=1)/2. seglen=30 x = x[:fstmp*seglen] fs = 22050 if fstmp != fs: x = samplerate.resample(x, fs/float(fstmp), 'sinc_best') Mag, Phs = compute_fft(x) Mag = Mag[:1200,:513] Phs = Phs[:1200,:513] epsilon = np.linalg.norm(Mag)/Mag.shape[0]/10**(snr/20.) else: raise ValueError("Check that song-level iterator is indeed returning 'raw data'") X_adv, P_adv = find_adversary( model=dnn_model, X0=Mag, label=target, fwd_xform=fwd_xform, back_xform=back_xform, P0=np.hstack((Phs, -Phs[:,-2:-dataset.nfft/2-1:-1])), mu=.01, epsilon=epsilon, maxits=50, stop_thresh=0.5, griffin_lim=False#True ) if save_adversary_audio: nfft = 2*(X_adv.shape[1]-1) nhop = nfft//2 x_adv = overlap_add(np.hstack((X_adv, X_adv[:,-2:-nfft//2-1:-1])) * np.exp(1j*P_adv), nfft, nhop) audiolab.wavwrite(x_adv, os.path.join(save_adversary_audio, el[2]), 22050, 'pcm16') #frame_labels = np.argmax(fprop(X_adv), axis=1) #hist = np.bincount(frame_labels, minlength=n_classes) fpass = fprop(X_adv) conf = np.sum(fpass, axis=0) / float(fpass.shape[0]) dnn_label = np.argmax(conf) #np.argmax(hist) # most used label true_label = el[1] # truncate to correct length ext = min(Mag.shape[0], X_adv.shape[0]) Mag = Mag[:ext,:] X_adv = X_adv[:ext,:] X_diff = Mag-X_adv out_snr = 20*np.log10(np.linalg.norm(Mag)/np.linalg.norm(X_diff)) dnn_writer.writerow([dataset.file_list[i], true_label, dnn_label, out_snr, conf[dnn_label]]) print 'Mode:{}, True label:{}, Adv label:{}, Sel label:{}, Conf:{}, Out snr: {}'.format(mode, true_label, target, dnn_label, conf[dnn_label], out_snr) if aux_model: fft_agg = aggregate_features(dnn_model, X_adv, which_layers) aux_vote = np.argmax(np.bincount(np.array(aux_model.predict(fft_agg), dtype='int'))) aux_writer.writerow([dataset.file_list[i], true_label, aux_vote]) print 'AUX adversarial label: {}'.format(aux_vote) if aux_model: aux_fname.close() print ''