def file_misclass_error_printf(dnn_model, aux_model, which_layers, data_dir, file_list, filter_cutoff, dnn_save_file, aux_save_file): # closures def dnn_classify(X): batch = dnn_model.get_input_space().make_theano_batch() fprop = theano.function([batch], dnn_model.fprop(batch)) prediction = np.argmax(np.sum(fprop(X), axis=0)) return prediction def aux_classify(X): Xagg = aggregate_features(dnn_model, X, which_layers) prediction = np.argmax( np.bincount(np.array(aux_model.predict(Xagg), dtype='int'))) return prediction # filter coeffs b, a = sp.signal.butter(4, filter_cutoff / (22050. / 2.)) dnn_file = open(dnn_save_file, 'w') aux_file = open(aux_save_file, 'w') label_list = { 'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9 } for i, fname in enumerate(file_list): print 'Processing file {} of {}'.format(i + 1, len(file_list)) true_label = label_list[fname.split('/')[0]] x, _, _ = audiolab.wavread(os.path.join(data_dir, fname)) x = sp.signal.lfilter(b, a, x) X, _ = compute_fft(x) X = np.array(X[:, :513], dtype=np.float32) dnn_pred = dnn_classify(X) dnn_file.write('{fname}\t{true_label}\t{pred_label}\n'.format( fname=fname, true_label=true_label, pred_label=dnn_pred)) aux_pred = aux_classify(X) aux_file.write('{fname}\t{true_label}\t{pred_label}\n'.format( fname=fname, true_label=true_label, pred_label=aux_pred)) dnn_file.close() aux_file.close()
def file_misclass_error_printf(dnn_model, aux_model, which_layers, data_dir, file_list, filter_cutoff, dnn_save_file, aux_save_file): # closures def dnn_classify(X): batch = dnn_model.get_input_space().make_theano_batch() fprop = theano.function([batch], dnn_model.fprop(batch)) prediction = np.argmax(np.sum(fprop(X), axis=0)) return prediction def aux_classify(X): Xagg = aggregate_features(dnn_model, X, which_layers) prediction = np.argmax(np.bincount(np.array(aux_model.predict(Xagg), dtype='int'))) return prediction # filter coeffs b,a = sp.signal.butter(4, filter_cutoff/(22050./2.)) dnn_file = open(dnn_save_file, 'w') aux_file = open(aux_save_file, 'w') label_list = {'blues':0, 'classical':1, 'country':2, 'disco':3, 'hiphop':4, 'jazz':5, 'metal':6, 'pop':7, 'reggae':8, 'rock':9} for i, fname in enumerate(file_list): print 'Processing file {} of {}'.format(i+1, len(file_list)) true_label = label_list[fname.split('/')[0]] x,_,_ = audiolab.wavread(os.path.join(data_dir, fname)) x = sp.signal.lfilter(b,a,x) X,_ = compute_fft(x) X = np.array(X[:,:513], dtype=np.float32) dnn_pred = dnn_classify(X) dnn_file.write('{fname}\t{true_label}\t{pred_label}\n'.format( fname=fname, true_label=true_label, pred_label=dnn_pred)) aux_pred = aux_classify(X) aux_file.write('{fname}\t{true_label}\t{pred_label}\n'.format( fname=fname, true_label=true_label, pred_label=aux_pred)) dnn_file.close() aux_file.close()
x = np.sum(x, axis=1)/2. # mono if fs != 22050: print 'resampling to 22050 hz:' x = samplerate.resample(x, 22050./fs, 'sinc_best') fs = 22050 # truncate input to multiple of hopsize nframes = (len(x)-nfft)/nhop x = x[:(nframes-1)*nhop + nfft] # smooth boundaries to prevent a click x[:512] *= win[:512] x[-512:] *= win[512:] # compute mag. spectra Mag, Phs = compute_fft(x, nfft, nhop) X0 = Mag[:,:input_space.dim] epsilon = np.linalg.norm(X0)/X0.shape[0]/10**(snr/20.) # write file name dnn_file.write('{}\t'.format(fname)) dnn_file_filt.write('{}\t'.format(fname)) aux_file.write('{}\t'.format(fname)) aux_file_filt.write('{}\t'.format(fname)) for t in targets: # search for adversary X_adv, P_adv = find_adversary( model=dnn_model,
x = np.sum(x, axis=1) / 2. # mono if fs != 22050: print 'resampling to 22050 hz:' x = samplerate.resample(x, 22050. / fs, 'sinc_best') fs = 22050 # truncate input to multiple of hopsize nframes = (len(x) - nfft) / nhop x = x[:(nframes - 1) * nhop + nfft] # smooth boundaries to prevent a click x[:512] *= win[:512] x[-512:] *= win[512:] # compute mag. spectra Mag, Phs = compute_fft(x, nfft, nhop) X0 = Mag[:, :input_space.dim] epsilon = np.linalg.norm(X0) / X0.shape[0] / 10**(snr / 20.) # write file name dnn_file.write('{}\t'.format(fname)) dnn_file_filt.write('{}\t'.format(fname)) aux_file.write('{}\t'.format(fname)) aux_file_filt.write('{}\t'.format(fname)) for t in targets: # search for adversary X_adv, P_adv = find_adversary(model=dnn_model, X0=X0,
x, fstmp, _ = read_fun(f) # make mono if len(x.shape) != 1: x = np.sum(x, axis=1)/2. seglen=30 x = x[:fstmp*seglen] fs = 22050 if fstmp != fs: x = samplerate.resample(x, fs/float(fstmp), 'sinc_best') # compute mag. spectra Mag, Phs = compute_fft(x, nfft, nhop) X0 = Mag[:,:dim] epsilon = np.linalg.norm(X0)/X0.shape[0]/10**(snr/20.) # write file name dnn_file.write('{}\t'.format(fname)) if args.aux_model: aux_file.write('{}\t'.format(fname)) for t in targets: # search for adversary X_adv, P_adv = find_adversary( model=dnn_model, X0=X0,
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, description='') parser.add_argument('--true_file') parser.add_argument('--adversary') args = parser.parse_args() # load sndfile x,_,_ = audiolab.wavread(args.true_file) x_adv,_,_ = audiolab.wavread(args.adversary) L = min(len(x), len(x_adv)) ign = 2048 snr = 20*np.log10(np.linalg.norm(x[ign:L-ign-1])/np.linalg.norm(x[ign:L-ign-1]-x_adv[ign:L-ign-1])) print 'SNR: ', snr # STFT X = compute_fft(x)[0][:,:513] X_adv = compute_fft(x_adv)[0][:,:513] rng = 1+np.arange(400) Xt = X[rng,:] X = 20*np.log10(Xt) Xt_adv = X_adv[rng,:] X_adv = 20*np.log10(Xt_adv) # nrm = np.max(X)/1. # X /= nrm # X_adv /= nrm vmin = np.min(X) vmax = np.max(X)
def file_misclass_error_printf(dnn_model, root_dir, dataset, save_file, mode='all_same', label=0, snr=30, aux_model=None, aux_save_file=None, which_layers=None, save_adversary_audio=None, fwd_xform=None, back_xform=None): """ Function to compute the file-level classification error by classifying individual frames and then voting for the class with highest cumulative probability """ if fwd_xform is None: print 'fwd_xform=None, using identity' fwd_xform = lambda X: X if back_xform is None: print 'back_xform=None, using identity' back_xform = lambda X: X n_classes = len(dataset.targets) X = dnn_model.get_input_space().make_theano_batch() Y = dnn_model.fprop(X) fprop_theano = theano.function([X], Y) input_space = dnn_model.get_input_space() if isinstance(input_space, Conv2DSpace): tframes, dim = input_space.shape view_converter = DefaultViewConverter((tframes, dim, 1)) else: dim = input_space.dim tframes = 1 view_converter = None if view_converter is not None: def fprop(batch): nframes = batch.shape[0] thop = 1. sup = np.arange(0, nframes - tframes + 1, np.int(tframes / thop)) data = np.vstack([ np.reshape(batch[i:i + tframes, :], (tframes * dim, )) for i in sup ]) data = fwd_xform(data) return fprop_theano( view_converter.get_formatted_batch(data, input_space)) else: fprop = fprop_theano n_examples = len(dataset.file_list) target_space = dnn_model.get_output_space() #VectorSpace(dim=n_classes) feat_space = dnn_model.get_input_space( ) #VectorSpace(dim=dataset.nfft//2+1, dtype='complex64') data_specs = (CompositeSpace( (feat_space, target_space)), ("songlevel-features", "targets")) iterator = dataset.iterator(mode='sequential', batch_size=1, data_specs=data_specs) if aux_model: aux_fname = open(aux_save_file, 'w') aux_writer = csv.writer(aux_fname, delimiter='\t') with open(save_file, 'w') as fname: dnn_writer = csv.writer(fname, delimiter='\t') for i, el in enumerate(iterator): # display progress indicator 'Progress: %2.0f%%\r' % (100 * i / float(n_examples)) Mag, Phs = np.abs(el[0], dtype=np.float32), np.angle(el[0]) epsilon = np.linalg.norm(Mag) / Mag.shape[0] / 10**(snr / 20.) if mode == 'all_same': target = label elif mode == 'perfect': target = el[1] elif mode == 'random': target = np.random.randint(n_classes) elif mode == 'all_wrong': cand = np.setdiff1d(np.arange(n_classes), np.array( el[1])) # remove ground truth label from set of options target = cand[np.random.randint(len(cand))] if 1: # re-read audio (seems to be bug when reading from h5) f = el[2] if f.endswith('.wav'): read_fun = audiolab.wavread elif f.endswith('.au'): read_fun = audiolab.auread elif f.endswith('.mp3'): read_fun = read_mp3 x, fstmp, _ = read_fun(os.path.join(root_dir, f)) # make mono if len(x.shape) != 1: x = np.sum(x, axis=1) / 2. seglen = 30 x = x[:fstmp * seglen] fs = 22050 if fstmp != fs: x = samplerate.resample(x, fs / float(fstmp), 'sinc_best') Mag, Phs = compute_fft(x) Mag = Mag[:1200, :513] Phs = Phs[:1200, :513] epsilon = np.linalg.norm(Mag) / Mag.shape[0] / 10**(snr / 20.) else: raise ValueError( "Check that song-level iterator is indeed returning 'raw data'" ) X_adv, P_adv = find_adversary( model=dnn_model, X0=Mag, label=target, fwd_xform=fwd_xform, back_xform=back_xform, P0=np.hstack((Phs, -Phs[:, -2:-dataset.nfft / 2 - 1:-1])), mu=.01, epsilon=epsilon, maxits=50, stop_thresh=0.5, griffin_lim=False #True ) if save_adversary_audio: nfft = 2 * (X_adv.shape[1] - 1) nhop = nfft // 2 x_adv = overlap_add( np.hstack((X_adv, X_adv[:, -2:-nfft // 2 - 1:-1])) * np.exp(1j * P_adv), nfft, nhop) audiolab.wavwrite(x_adv, os.path.join(save_adversary_audio, el[2]), 22050, 'pcm16') #frame_labels = np.argmax(fprop(X_adv), axis=1) #hist = np.bincount(frame_labels, minlength=n_classes) fpass = fprop(X_adv) conf = np.sum(fpass, axis=0) / float(fpass.shape[0]) dnn_label = np.argmax(conf) #np.argmax(hist) # most used label true_label = el[1] # truncate to correct length ext = min(Mag.shape[0], X_adv.shape[0]) Mag = Mag[:ext, :] X_adv = X_adv[:ext, :] X_diff = Mag - X_adv out_snr = 20 * np.log10( np.linalg.norm(Mag) / np.linalg.norm(X_diff)) dnn_writer.writerow([ dataset.file_list[i], true_label, dnn_label, out_snr, conf[dnn_label] ]) print 'Mode:{}, True label:{}, Adv label:{}, Sel label:{}, Conf:{}, Out snr: {}'.format( mode, true_label, target, dnn_label, conf[dnn_label], out_snr) if aux_model: fft_agg = aggregate_features(dnn_model, X_adv, which_layers) aux_vote = np.argmax( np.bincount( np.array(aux_model.predict(fft_agg), dtype='int'))) aux_writer.writerow( [dataset.file_list[i], true_label, aux_vote]) print 'AUX adversarial label: {}'.format(aux_vote) if aux_model: aux_fname.close() print ''
parser.add_argument('--adversary') args = parser.parse_args() # load sndfile x, _, _ = audiolab.wavread(args.true_file) x_adv, _, _ = audiolab.wavread(args.adversary) L = min(len(x), len(x_adv)) ign = 2048 snr = 20 * np.log10( np.linalg.norm(x[ign:L - ign - 1]) / np.linalg.norm(x[ign:L - ign - 1] - x_adv[ign:L - ign - 1])) print 'SNR: ', snr # STFT X = compute_fft(x)[0][:, :513] X_adv = compute_fft(x_adv)[0][:, :513] rng = 1 + np.arange(400) Xt = X[rng, :] X = 20 * np.log10(Xt) Xt_adv = X_adv[rng, :] X_adv = 20 * np.log10(Xt_adv) # nrm = np.max(X)/1. # X /= nrm # X_adv /= nrm vmin = np.min(X) vmax = np.max(X)
def file_misclass_error_printf(dnn_model, root_dir, dataset, save_file, mode='all_same', label=0, snr=30, aux_model=None, aux_save_file=None, which_layers=None, save_adversary_audio=None, fwd_xform=None, back_xform=None): """ Function to compute the file-level classification error by classifying individual frames and then voting for the class with highest cumulative probability """ if fwd_xform is None: print 'fwd_xform=None, using identity' fwd_xform = lambda X: X if back_xform is None: print 'back_xform=None, using identity' back_xform = lambda X: X n_classes = len(dataset.targets) X = dnn_model.get_input_space().make_theano_batch() Y = dnn_model.fprop(X) fprop_theano = theano.function([X],Y) input_space = dnn_model.get_input_space() if isinstance(input_space, Conv2DSpace): tframes, dim = input_space.shape view_converter = DefaultViewConverter((tframes, dim, 1)) else: dim = input_space.dim tframes = 1 view_converter = None if view_converter is not None: def fprop(batch): nframes = batch.shape[0] thop = 1. sup = np.arange(0,nframes-tframes+1, np.int(tframes/thop)) data = np.vstack([np.reshape(batch[i:i+tframes, :],(tframes*dim,)) for i in sup]) data = fwd_xform(data) return fprop_theano(view_converter.get_formatted_batch(data, input_space)) else: fprop = fprop_theano n_examples = len(dataset.file_list) target_space = dnn_model.get_output_space() #VectorSpace(dim=n_classes) feat_space = dnn_model.get_input_space() #VectorSpace(dim=dataset.nfft//2+1, dtype='complex64') data_specs = (CompositeSpace((feat_space, target_space)), ("songlevel-features", "targets")) iterator = dataset.iterator(mode='sequential', batch_size=1, data_specs=data_specs) if aux_model: aux_fname = open(aux_save_file, 'w') aux_writer = csv.writer(aux_fname, delimiter='\t') with open(save_file, 'w') as fname: dnn_writer = csv.writer(fname, delimiter='\t') for i,el in enumerate(iterator): # display progress indicator 'Progress: %2.0f%%\r' % (100*i/float(n_examples)) Mag, Phs = np.abs(el[0], dtype=np.float32), np.angle(el[0]) epsilon = np.linalg.norm(Mag)/Mag.shape[0]/10**(snr/20.) if mode == 'all_same': target = label elif mode == 'perfect': target = el[1] elif mode == 'random': target = np.random.randint(n_classes) elif mode == 'all_wrong': cand = np.setdiff1d(np.arange(n_classes),np.array(el[1])) # remove ground truth label from set of options target = cand[np.random.randint(len(cand))] if 1: # re-read audio (seems to be bug when reading from h5) f = el[2] if f.endswith('.wav'): read_fun = audiolab.wavread elif f.endswith('.au'): read_fun = audiolab.auread elif f.endswith('.mp3'): read_fun = read_mp3 x, fstmp, _ = read_fun(os.path.join(root_dir, f)) # make mono if len(x.shape) != 1: x = np.sum(x, axis=1)/2. seglen=30 x = x[:fstmp*seglen] fs = 22050 if fstmp != fs: x = samplerate.resample(x, fs/float(fstmp), 'sinc_best') Mag, Phs = compute_fft(x) Mag = Mag[:1200,:513] Phs = Phs[:1200,:513] epsilon = np.linalg.norm(Mag)/Mag.shape[0]/10**(snr/20.) else: raise ValueError("Check that song-level iterator is indeed returning 'raw data'") X_adv, P_adv = find_adversary( model=dnn_model, X0=Mag, label=target, fwd_xform=fwd_xform, back_xform=back_xform, P0=np.hstack((Phs, -Phs[:,-2:-dataset.nfft/2-1:-1])), mu=.01, epsilon=epsilon, maxits=50, stop_thresh=0.5, griffin_lim=False#True ) if save_adversary_audio: nfft = 2*(X_adv.shape[1]-1) nhop = nfft//2 x_adv = overlap_add(np.hstack((X_adv, X_adv[:,-2:-nfft//2-1:-1])) * np.exp(1j*P_adv), nfft, nhop) audiolab.wavwrite(x_adv, os.path.join(save_adversary_audio, el[2]), 22050, 'pcm16') #frame_labels = np.argmax(fprop(X_adv), axis=1) #hist = np.bincount(frame_labels, minlength=n_classes) fpass = fprop(X_adv) conf = np.sum(fpass, axis=0) / float(fpass.shape[0]) dnn_label = np.argmax(conf) #np.argmax(hist) # most used label true_label = el[1] # truncate to correct length ext = min(Mag.shape[0], X_adv.shape[0]) Mag = Mag[:ext,:] X_adv = X_adv[:ext,:] X_diff = Mag-X_adv out_snr = 20*np.log10(np.linalg.norm(Mag)/np.linalg.norm(X_diff)) dnn_writer.writerow([dataset.file_list[i], true_label, dnn_label, out_snr, conf[dnn_label]]) print 'Mode:{}, True label:{}, Adv label:{}, Sel label:{}, Conf:{}, Out snr: {}'.format(mode, true_label, target, dnn_label, conf[dnn_label], out_snr) if aux_model: fft_agg = aggregate_features(dnn_model, X_adv, which_layers) aux_vote = np.argmax(np.bincount(np.array(aux_model.predict(fft_agg), dtype='int'))) aux_writer.writerow([dataset.file_list[i], true_label, aux_vote]) print 'AUX adversarial label: {}'.format(aux_vote) if aux_model: aux_fname.close() print ''