예제 #1
0
        for t in targets:

            # search for adversary
            X_adv, P_adv = find_adversary(
                model=dnn_model, 
                X0=X0, 
                label=t, 
                P0=Phs, 
                mu=mu, 
                epsilon=epsilon, 
                maxits=maxits, 
                stop_thresh=stop_thresh, 
                griffin_lim=True)

            # get time-domain representation
            x_adv   = overlap_add( np.hstack((X_adv, X_adv[:,-2:-nfft/2-1:-1])) * np.exp(1j*P_adv))
            out_snr = 20*np.log10(np.linalg.norm(x[nfft:-nfft]) / np.linalg.norm(x[nfft:-nfft]-x_adv[nfft:-nfft]))

            # BEFORE FILTERING
            # ===========================================
            # dnn prediction
            pred = np.argmax(np.sum(fprop(X_adv), axis=0))
            if pred == t:
                dnn_file.write('{}\t'.format(int(out_snr+.5)))
            else:
                dnn_file.write('{}\t'.format('na'))

            # aux prediction
            X_adv_agg = aggregate_features(dnn_model, X_adv, which_layers)
            pred = np.argmax(np.bincount(np.array(aux_model.predict(X_adv_agg), dtype='int')))
            if pred == t:
예제 #2
0
def file_misclass_error_printf(dnn_model,
                               root_dir,
                               dataset,
                               save_file,
                               mode='all_same',
                               label=0,
                               snr=30,
                               aux_model=None,
                               aux_save_file=None,
                               which_layers=None,
                               save_adversary_audio=None,
                               fwd_xform=None,
                               back_xform=None):
    """
    Function to compute the file-level classification error by classifying
    individual frames and then voting for the class with highest cumulative probability
    """
    if fwd_xform is None:
        print 'fwd_xform=None, using identity'
        fwd_xform = lambda X: X
    if back_xform is None:
        print 'back_xform=None, using identity'
        back_xform = lambda X: X

    n_classes = len(dataset.targets)

    X = dnn_model.get_input_space().make_theano_batch()
    Y = dnn_model.fprop(X)
    fprop_theano = theano.function([X], Y)

    input_space = dnn_model.get_input_space()
    if isinstance(input_space, Conv2DSpace):
        tframes, dim = input_space.shape
        view_converter = DefaultViewConverter((tframes, dim, 1))
    else:
        dim = input_space.dim
        tframes = 1
        view_converter = None

    if view_converter is not None:

        def fprop(batch):
            nframes = batch.shape[0]
            thop = 1.
            sup = np.arange(0, nframes - tframes + 1, np.int(tframes / thop))

            data = np.vstack([
                np.reshape(batch[i:i + tframes, :], (tframes * dim, ))
                for i in sup
            ])
            data = fwd_xform(data)

            return fprop_theano(
                view_converter.get_formatted_batch(data, input_space))

    else:
        fprop = fprop_theano

    n_examples = len(dataset.file_list)
    target_space = dnn_model.get_output_space()  #VectorSpace(dim=n_classes)
    feat_space = dnn_model.get_input_space(
    )  #VectorSpace(dim=dataset.nfft//2+1, dtype='complex64')
    data_specs = (CompositeSpace(
        (feat_space, target_space)), ("songlevel-features", "targets"))
    iterator = dataset.iterator(mode='sequential',
                                batch_size=1,
                                data_specs=data_specs)

    if aux_model:
        aux_fname = open(aux_save_file, 'w')
        aux_writer = csv.writer(aux_fname, delimiter='\t')

    with open(save_file, 'w') as fname:
        dnn_writer = csv.writer(fname, delimiter='\t')
        for i, el in enumerate(iterator):

            # display progress indicator
            'Progress: %2.0f%%\r' % (100 * i / float(n_examples))

            Mag, Phs = np.abs(el[0], dtype=np.float32), np.angle(el[0])
            epsilon = np.linalg.norm(Mag) / Mag.shape[0] / 10**(snr / 20.)

            if mode == 'all_same':
                target = label
            elif mode == 'perfect':
                target = el[1]
            elif mode == 'random':
                target = np.random.randint(n_classes)
            elif mode == 'all_wrong':
                cand = np.setdiff1d(np.arange(n_classes), np.array(
                    el[1]))  # remove ground truth label from set of options
                target = cand[np.random.randint(len(cand))]

            if 1:  # re-read audio (seems to be bug when reading from h5)
                f = el[2]
                if f.endswith('.wav'):
                    read_fun = audiolab.wavread
                elif f.endswith('.au'):
                    read_fun = audiolab.auread
                elif f.endswith('.mp3'):
                    read_fun = read_mp3

                x, fstmp, _ = read_fun(os.path.join(root_dir, f))

                # make mono
                if len(x.shape) != 1:
                    x = np.sum(x, axis=1) / 2.

                seglen = 30
                x = x[:fstmp * seglen]

                fs = 22050
                if fstmp != fs:
                    x = samplerate.resample(x, fs / float(fstmp), 'sinc_best')

                Mag, Phs = compute_fft(x)
                Mag = Mag[:1200, :513]
                Phs = Phs[:1200, :513]
                epsilon = np.linalg.norm(Mag) / Mag.shape[0] / 10**(snr / 20.)
            else:
                raise ValueError(
                    "Check that song-level iterator is indeed returning 'raw data'"
                )

            X_adv, P_adv = find_adversary(
                model=dnn_model,
                X0=Mag,
                label=target,
                fwd_xform=fwd_xform,
                back_xform=back_xform,
                P0=np.hstack((Phs, -Phs[:, -2:-dataset.nfft / 2 - 1:-1])),
                mu=.01,
                epsilon=epsilon,
                maxits=50,
                stop_thresh=0.5,
                griffin_lim=False  #True
            )

            if save_adversary_audio:

                nfft = 2 * (X_adv.shape[1] - 1)
                nhop = nfft // 2
                x_adv = overlap_add(
                    np.hstack((X_adv, X_adv[:, -2:-nfft // 2 - 1:-1])) *
                    np.exp(1j * P_adv), nfft, nhop)
                audiolab.wavwrite(x_adv,
                                  os.path.join(save_adversary_audio, el[2]),
                                  22050, 'pcm16')

            #frame_labels = np.argmax(fprop(X_adv), axis=1)
            #hist         = np.bincount(frame_labels, minlength=n_classes)

            fpass = fprop(X_adv)
            conf = np.sum(fpass, axis=0) / float(fpass.shape[0])
            dnn_label = np.argmax(conf)  #np.argmax(hist) # most used label
            true_label = el[1]

            # truncate to correct length
            ext = min(Mag.shape[0], X_adv.shape[0])
            Mag = Mag[:ext, :]
            X_adv = X_adv[:ext, :]

            X_diff = Mag - X_adv
            out_snr = 20 * np.log10(
                np.linalg.norm(Mag) / np.linalg.norm(X_diff))

            dnn_writer.writerow([
                dataset.file_list[i], true_label, dnn_label, out_snr,
                conf[dnn_label]
            ])

            print 'Mode:{}, True label:{}, Adv label:{}, Sel label:{}, Conf:{}, Out snr: {}'.format(
                mode, true_label, target, dnn_label, conf[dnn_label], out_snr)
            if aux_model:
                fft_agg = aggregate_features(dnn_model, X_adv, which_layers)
                aux_vote = np.argmax(
                    np.bincount(
                        np.array(aux_model.predict(fft_agg), dtype='int')))
                aux_writer.writerow(
                    [dataset.file_list[i], true_label, aux_vote])
                print 'AUX adversarial label: {}'.format(aux_vote)
    if aux_model:
        aux_fname.close()
    print ''
예제 #3
0
        for t in targets:

            # search for adversary
            X_adv, P_adv = find_adversary(model=dnn_model,
                                          X0=X0,
                                          label=t,
                                          P0=Phs,
                                          mu=mu,
                                          epsilon=epsilon,
                                          maxits=maxits,
                                          stop_thresh=stop_thresh,
                                          griffin_lim=True)

            # get time-domain representation
            x_adv = overlap_add(
                np.hstack((X_adv, X_adv[:, -2:-nfft / 2 - 1:-1])) *
                np.exp(1j * P_adv))
            out_snr = 20 * np.log10(
                np.linalg.norm(x[nfft:-nfft]) /
                np.linalg.norm(x[nfft:-nfft] - x_adv[nfft:-nfft]))

            # BEFORE FILTERING
            # ===========================================
            # dnn prediction
            pred = np.argmax(np.sum(fprop(X_adv), axis=0))
            if pred == t:
                dnn_file.write('{}\t'.format(int(out_snr + .5)))
            else:
                dnn_file.write('{}\t'.format('na'))

            # aux prediction
예제 #4
0
def file_misclass_error_printf(dnn_model, root_dir, dataset, save_file, mode='all_same', label=0, snr=30, aux_model=None, aux_save_file=None, which_layers=None, save_adversary_audio=None, fwd_xform=None, back_xform=None):
    """
    Function to compute the file-level classification error by classifying
    individual frames and then voting for the class with highest cumulative probability
    """
    if fwd_xform is None: 
        print 'fwd_xform=None, using identity'
        fwd_xform = lambda X: X
    if back_xform is None: 
        print 'back_xform=None, using identity'
        back_xform = lambda X: X

    n_classes  = len(dataset.targets)    

    X     = dnn_model.get_input_space().make_theano_batch()
    Y     = dnn_model.fprop(X)
    fprop_theano = theano.function([X],Y)

    input_space = dnn_model.get_input_space()
    if isinstance(input_space, Conv2DSpace):
        tframes, dim = input_space.shape
        view_converter = DefaultViewConverter((tframes, dim, 1))
    else:
        dim = input_space.dim        
        tframes = 1
        view_converter = None

    if view_converter is not None:
        def fprop(batch):
            nframes = batch.shape[0]
            thop = 1.
            sup = np.arange(0,nframes-tframes+1, np.int(tframes/thop))
            
            data = np.vstack([np.reshape(batch[i:i+tframes, :],(tframes*dim,)) for i in sup])
            data = fwd_xform(data)
            
            return fprop_theano(view_converter.get_formatted_batch(data, input_space))

    else:
        fprop = fprop_theano

    n_examples   = len(dataset.file_list)
    target_space = dnn_model.get_output_space() #VectorSpace(dim=n_classes)
    feat_space   = dnn_model.get_input_space() #VectorSpace(dim=dataset.nfft//2+1, dtype='complex64')
    data_specs   = (CompositeSpace((feat_space, target_space)), ("songlevel-features", "targets"))     
    iterator     = dataset.iterator(mode='sequential', batch_size=1, data_specs=data_specs)

    if aux_model:
        aux_fname = open(aux_save_file, 'w')
        aux_writer = csv.writer(aux_fname, delimiter='\t')

    with open(save_file, 'w') as fname:
        dnn_writer = csv.writer(fname, delimiter='\t')
        for i,el in enumerate(iterator):

            # display progress indicator
            'Progress: %2.0f%%\r' % (100*i/float(n_examples))
        
            Mag, Phs = np.abs(el[0], dtype=np.float32), np.angle(el[0])
            epsilon  = np.linalg.norm(Mag)/Mag.shape[0]/10**(snr/20.)

            if mode == 'all_same':
                target = label
            elif mode == 'perfect':
                target = el[1]
            elif mode == 'random':
                target = np.random.randint(n_classes)
            elif mode == 'all_wrong':
                cand = np.setdiff1d(np.arange(n_classes),np.array(el[1])) # remove ground truth label from set of options
                target = cand[np.random.randint(len(cand))]

            if 1: # re-read audio (seems to be bug when reading from h5)
                f = el[2]
                if f.endswith('.wav'):
                    read_fun = audiolab.wavread             
                elif f.endswith('.au'):
                    read_fun = audiolab.auread
                elif f.endswith('.mp3'):
                    read_fun = read_mp3

                x, fstmp, _ = read_fun(os.path.join(root_dir, f))

                # make mono
                if len(x.shape) != 1: 
                    x = np.sum(x, axis=1)/2.

                seglen=30
                x = x[:fstmp*seglen]

                fs = 22050
                if fstmp != fs:
                    x = samplerate.resample(x, fs/float(fstmp), 'sinc_best')

                Mag, Phs = compute_fft(x)
                Mag = Mag[:1200,:513]
                Phs = Phs[:1200,:513]
                epsilon = np.linalg.norm(Mag)/Mag.shape[0]/10**(snr/20.)
            else:
                raise ValueError("Check that song-level iterator is indeed returning 'raw data'") 

            X_adv, P_adv = find_adversary(
                model=dnn_model, 
                X0=Mag, 
                label=target,
                fwd_xform=fwd_xform,
                back_xform=back_xform,
                P0=np.hstack((Phs, -Phs[:,-2:-dataset.nfft/2-1:-1])), 
                mu=.01, 
                epsilon=epsilon, 
                maxits=50, 
                stop_thresh=0.5, 
                griffin_lim=False#True
                )
            
            if save_adversary_audio: 
                
                nfft  = 2*(X_adv.shape[1]-1)
                nhop  = nfft//2      
                x_adv = overlap_add(np.hstack((X_adv, X_adv[:,-2:-nfft//2-1:-1])) * np.exp(1j*P_adv), nfft, nhop)
                audiolab.wavwrite(x_adv, os.path.join(save_adversary_audio, el[2]), 22050, 'pcm16')

            #frame_labels = np.argmax(fprop(X_adv), axis=1)
            #hist         = np.bincount(frame_labels, minlength=n_classes)
            
            fpass = fprop(X_adv)
            conf = np.sum(fpass, axis=0) / float(fpass.shape[0])
            dnn_label = np.argmax(conf) #np.argmax(hist) # most used label
            true_label = el[1]

            # truncate to correct length
            ext = min(Mag.shape[0], X_adv.shape[0])
            Mag = Mag[:ext,:]
            X_adv = X_adv[:ext,:]

            X_diff = Mag-X_adv
            out_snr = 20*np.log10(np.linalg.norm(Mag)/np.linalg.norm(X_diff))
            
            dnn_writer.writerow([dataset.file_list[i], true_label, dnn_label, out_snr, conf[dnn_label]]) 

            print 'Mode:{}, True label:{}, Adv label:{}, Sel label:{}, Conf:{}, Out snr: {}'.format(mode, true_label, target, dnn_label, conf[dnn_label], out_snr)
            if aux_model:
                fft_agg  = aggregate_features(dnn_model, X_adv, which_layers)
                aux_vote = np.argmax(np.bincount(np.array(aux_model.predict(fft_agg), dtype='int')))
                aux_writer.writerow([dataset.file_list[i], true_label, aux_vote]) 
                print 'AUX adversarial label: {}'.format(aux_vote)
    if aux_model:
        aux_fname.close()
    print ''