Ejemplo n.º 1
0
def main():

    flags = parse_flags()
    hparams = parse_hparams(flags.hparams)
    num_classes = 41
    df = pd.read_csv(flags.infer_csv_file)
    file_names = df.iloc[:, 0].values
    count = 0

    with tf.Graph().as_default():
        substitute = model.BaselineCNN(hparams, num_classes)
        audio_input = tf.placeholder(tf.float32,
                                     shape=[None],
                                     name='audio_input')
        start_vars = set(x.name for x in tf.global_variables())
        features = inputs.compute_features(audio_input, hparams)
        preds = substitute.get_probs(features)
        end_vars = tf.global_variables()
        model_vars = [x for x in end_vars if x.name not in start_vars]
        saver = tf.train.Saver(var_list=model_vars)

        with tf.Session() as sess:
            saver.restore(sess=sess, save_path=flags.save_model_dir)

            for i in range(100):
                call = [
                    'ffmpeg', '-v', 'quiet', '-i',
                    os.path.join(flags.infer_audio_dir, file_names[i]), '-f',
                    'f32le', '-ar',
                    str(44100), '-ac', '1', 'pipe:1'
                ]
                samples = subprocess.check_output(call)
                waveform = np.frombuffer(samples, dtype=np.float32)

                pr = sess.run([preds], feed_dict={audio_input: waveform})
                pr = np.squeeze(pr)
                if (pr.ndim != 1):
                    pr = np.mean(pr, axis=0)
                #print(pr)
                #print(np.argmax(pr),np.max(pr))
                #print(df.iloc[i,1],utils_tf._convert_label_to_label_name(int(np.argmax(pr))))
                lab = utils_tf._convert_label_name_to_label(df.iloc[i, 1])
                if (lab == np.argmax(pr)):
                    count += 1
                    print(lab, np.argmax(pr), np.max(pr))
            print(float(count / len(file_names)))
    return
Ejemplo n.º 2
0
def target():
    """
    Label the data using a particular model and save the softmax values.
    Generates one softmax values per file
    """
    flags = parse_flags()
    hparams = parse_hparams(flags.hparams)
    num_classes = 41
    df = pd.read_csv(flags.infer_csv_file)
    file_names = df.iloc[:, 0].values

    count = 0

    sr = 32000
    df = pd.read_csv(flags.infer_csv_file)
    x, _ = utils_tf._load_dataset(cfg.to_dataset('training'))
    generator = utils.fit_scaler(x)
    file_names = df.iloc[:, 0].values
    with tf.Graph().as_default() as graph:
        mel_filt = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T
        mel_filt = tf.convert_to_tensor(mel_filt, dtype=tf.float32)
        pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio')
        model = CleverHansModel(flags.save_model_dir + '.meta', sr, generator,
                                mel_filt)
        saver = model.build_graph(pcm)

    with tf.Session(graph=graph) as sess:
        saver.restore(sess, flags.save_model_dir)
        print(len(file_names))
        for i in range(100):
            data, _ = utils_tf._preprocess_data(flags.infer_audio_dir,
                                                file_names[i])
            l = sess.run([model.get_probs()], feed_dict={pcm: data})
            l = np.squeeze(l)
            if (l.ndim != 1):
                l = np.mean(l, axis=0)

            lab = utils_tf._convert_label_name_to_label(df.iloc[i, 1])
            if (lab == np.argmax(l)):
                count += 1
                print(lab, np.argmax(l))

            print(count / 100)
Ejemplo n.º 3
0
def deepfoolcochlear(audio_path,metadata_path,model_path,exp_data_path,adv_audio_path,save_data=False):
    #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data
    #Load dataset to normalize new data
    df = pd.read_csv(metadata_path)
    label_names= df.iloc[:,2].values
    file_names = df.iloc[:,1].values
    sample_rate = 32000
    
    audio_name = []
    audio_length = []
    original_label = []
    original_confidence = []
    new_label = []
    new_confidence = []
    new_o_label_conf = []
    snr = []
    with tf.Graph().as_default() as graph:
        model = CochlearModel(model_path +'.meta')
        pcm = tf.placeholder(tf.float32,shape=[None,None],name='input_audio')
        saver= model.build_graph(pcm)
        deepfool = DFM.DeepFool(model)
        deepfool.build_attack(pcm)
    with tf.Session(graph=graph) as sess:
        saver.restore(sess,model_path)
        for i in range(1,2):
            audio_file_name = file_names[i]
            try:
                data,q = utils_tf._preprocess_data(audio_path,audio_file_name)
                data = np.expand_dims(data,axis=0)
            except EOFError:
                print("EOF Error")
 
            labels= utils_tf._convert_label_name_to_label(label_names[i])
            s = sess.run([model.get_probs()],feed_dict={'input_audio:0':data})

            s = np.squeeze(s)
            if (s.ndim != 1):
                s = np.mean(s,axis=0)
                      
            print('Ground truth:',labels)    
            print('Original label number:',np.argmax(s))
            print('Original label confidence:',np.max(s))
                
            tic = time.process_time()
            adv = deepfool.attack(sess,data,1)
            toc = time.process_time()

            print('Time for processing sample:',toc-tic,'for iteration:',i)
            preds = sess.run([model.get_probs()],feed_dict={pcm:adv})
            preds = np.squeeze(preds)

            if(preds.ndim !=1):
                preds = np.mean(preds,axis=0)
            print('New label number:',np.argmax(preds))
            print('New label confidence:',np.max(preds))
            adv = np.squeeze(adv)    
            if(save_data):
                librosa.output.write_wav(adv_audio_path + 'adv-' + audio_file_name,adv,sample_rate)
                
            audio_name.append(audio_file_name)
            audio_length.append(int(q))
            original_label.append(np.argmax(s))
            original_confidence.append(np.max(s))
            new_label.append(np.argmax(preds))
            new_confidence.append(np.max(preds))
            new_o_label_conf.append(preds[np.argmax(s)])
            snr.append(10*np.log10(np.mean(data**2)/(np.mean((adv-data)**2))))
        if(save_data):
            df_deepfool = pd.DataFrame({'audio_name':audio_name,'audio_length':audio_length,'original_label':original_label,'original_confidence':original_confidence,'new_label':new_label,'new_confidence':new_confidence,'new_orig_conf':new_o_label_conf,'SNR':snr})
        
            with open(exp_data_path,'a') as f:
                df_deepfool.to_csv(f,header=False)
Ejemplo n.º 4
0
def lbfgstargeted(audio_path,
                  metadata_path,
                  model_path,
                  exp_data_path,
                  adv_audio_path,
                  save_data=False):
    #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data
    #Load dataset to normalize new data
    x, _ = utils_tf._load_dataset(cfg.to_dataset('training'))
    generator = utils.fit_scaler(x)
    df = pd.read_csv(metadata_path)
    gt_labels = df.iloc[:, 2].values
    file_names = df.iloc[:, 1].values
    mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T
    sample_rate = 32000
    label_list = [
        "Bass_drum", "Cello", "Clarinet", "Oboe", "Snare_drum",
        "Violin_or_fiddle"
    ]

    audio_name = []
    audio_length = []
    original_label = []
    original_confidence = []
    new_label = []
    new_confidence = []
    new_o_label_conf = []
    snr = []
    with tf.Graph().as_default() as graph:
        mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32)
        model = CleverHansModel(model_path + '.meta', sample_rate, generator,
                                mel_filt)
        pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio')
        saver = model.build_graph(pcm)
        lbfgs = LB.LBFGS(model, binary_search_steps=2, max_iterations=200)
        lbfgs.build_attack(pcm)
    with tf.Session(graph=graph) as sess:
        saver.restore(sess, model_path)
        for i in range(df.shape[0]):
            audio_file_name = file_names[i]
            try:
                data, q = utils_tf._preprocess_data(audio_path,
                                                    audio_file_name)
            except EOFError:
                print("EOF Error")

            gt_label = gt_labels[i]
            s = sess.run([model.get_probs()],
                         feed_dict={'input_audio:0': data})

            s = np.squeeze(s)
            if (s.ndim != 1):
                s = np.mean(s, axis=0)

            print('Original label number:', np.argmax(s), 'GT:', gt_label)
            print('Original label confidence:', np.max(s))

            for l in range(len(label_list)):
                label = utils_tf._convert_label_name_to_label(label_list[l])
                if (label == gt_label):
                    continue

                tic = time.process_time()
                adv = lbfgs.attack(sess, data, np.repeat(label, int(q)))

                toc = time.process_time()

                print('Time for processing sample:', toc - tic,
                      'for iteration:', i)
                preds = sess.run([model.get_probs()], feed_dict={pcm: adv})
                preds = np.squeeze(preds)

                if (preds.ndim != 1):
                    preds = np.mean(preds, axis=0)
                print('New label number:', np.argmax(preds))
                print('New label confidence:', np.max(preds))

                if (save_data):
                    librosa.output.write_wav(
                        adv_audio_path + 'adv-' + label_list[l] + '-' +
                        audio_file_name, adv, sample_rate)

                audio_name.append(audio_file_name)
                audio_length.append(int(q))
                original_label.append(np.argmax(s))
                original_confidence.append(np.max(s))
                new_label.append(np.argmax(preds))
                new_confidence.append(np.max(preds))
                new_o_label_conf.append(preds[np.argmax(s)])
                snr.append(
                    10 *
                    np.log10(np.mean(data**2) / (np.mean((adv - data)**2))))
        if (save_data):
            df_deepfool = pd.DataFrame({
                'audio_name': audio_name,
                'audio_length': audio_length,
                'original_label': original_label,
                'original_confidence': original_confidence,
                'new_label': new_label,
                'new_confidence': new_confidence,
                'new_orig_conf': new_o_label_conf,
                'SNR': snr
            })

            with open(exp_data_path, 'a') as f:
                df_deepfool.to_csv(f, header=False)
Ejemplo n.º 5
0
def inferencecochlear(audio_path,
                      metadata_path,
                      model_path,
                      exp_data_path,
                      adv_audio_path,
                      save_data=False):
    #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data
    #Load dataset to normalize new data
    df = pd.read_csv(metadata_path)
    label_names = df.iloc[:, 1].values
    file_names = df.iloc[:, 0].values
    sample_rate = 32000

    audio_name = []
    inferred_label = []
    inferred_confidence = []
    ground_truth = []
    with tf.Graph().as_default() as graph:
        model = CochlearModel(model_path + '.meta')
        pcm = tf.placeholder(tf.float32,
                             shape=[None, None],
                             name='input_audio')
        saver = model.build_graph(pcm)
    with tf.Session(graph=graph) as sess:
        saver.restore(sess, model_path)
        count = 0
        count_tot = 0
        for i in range(df.shape[0]):
            audio_file_name = file_names[i]
            try:
                data, q = utils_tf._preprocess_data(audio_path,
                                                    audio_file_name)
                data = np.expand_dims(data, axis=0)
            except EOFError:
                print("EOF Error")

            gt_label = utils_tf._convert_label_name_to_label(label_names[i])
            s = sess.run([model.get_probs()],
                         feed_dict={'input_audio:0': data})

            s = np.squeeze(s)
            if (s.ndim != 1):
                s = np.mean(s, axis=0)

            count_tot += 1

            if (gt_label == np.argmax(s)):
                count += 1

            if (i % 1000 == 0):
                print('Iteration number:', i)
                print('Current accuracy:', float(count / count_tot))

            audio_name.append(audio_file_name)
            inferred_label.append(np.argmax(s))
            inferred_confidence.append(np.max(s))
            ground_truth.append(gt_label)
        if (save_data):
            df_infer = pd.DataFrame({
                'audio_name': audio_name,
                'ground_truth': ground_truth,
                'inferred_label': inferred_label,
                'inferred_confidence': inferred_confidence
            })

            with open(exp_data_path, 'w') as f:
                df_infer.to_csv(f, header=False)
Ejemplo n.º 6
0
def inferenceiqbal(audio_path,
                   metadata_path,
                   model_path,
                   exp_data_path,
                   adv_audio_path,
                   save_data=False):
    #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data
    #Load dataset to normalize new data
    x, _ = utils_tf._load_dataset(cfg.to_dataset('training'))
    generator = utils.fit_scaler(x)
    df = pd.read_csv(metadata_path)
    label_names = df.iloc[:, 1].values
    file_names = df.iloc[:, 0].values

    mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T
    sample_rate = 32000

    audio_name = []
    ground_truth = []
    inferred_label = []
    inferred_confidence = []
    with tf.Graph().as_default() as graph:
        mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32)
        model = CleverHansModel(model_path + '.meta', sample_rate, generator,
                                mel_filt)
        pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio')
        saver = model.build_graph(pcm)
    with tf.Session(graph=graph) as sess:
        saver.restore(sess, model_path)
        count = 0
        count_tot = 0
        for i in range(df.shape[0]):
            audio_file_name = file_names[i]
            try:
                data, q = utils_tf._preprocess_data(audio_path,
                                                    audio_file_name)
            except EOFError:
                print("EOF Error")

            gt_label = utils_tf._convert_label_name_to_label(label_names[i])
            s = sess.run([model.get_probs()],
                         feed_dict={'input_audio:0': data})

            s = np.squeeze(s)
            if (s.ndim != 1):
                s = np.mean(s, axis=0)
            label = np.argmax(s)
            count_tot += 1
            if (label == gt_label):
                count += 1

            if (i % 1000 == 0):
                print('Iteration number:', i)
                print('Current accuracy:', float(count / count_tot))
            audio_name.append(audio_file_name)
            ground_truth.append(gt_label)
            inferred_label.append(label)
            inferred_confidence.append(np.max(s))
        if (save_data):
            df_deepfool = pd.DataFrame({
                'audio_name':
                audio_name,
                'ground_truth':
                ground_truth,
                'inferred_label':
                inferred_label,
                'inferred_confidence':
                inferred_confidence
            })

            with open(exp_data_path, 'w') as f:
                df_deepfool.to_csv(f, header=False)
def carliniwagneruntargetedcochlear(audio_path,
                                    metadata_path,
                                    model_path,
                                    exp_data_path,
                                    adv_audio_path,
                                    save_data=False):
    #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data
    #Load dataset to normalize new data
    df = pd.read_csv(metadata_path)
    label_names = df.iloc[:, 2].values
    file_names = df.iloc[:, 1].values
    sample_rate = 32000

    audio_name = []
    audio_length = []
    original_label = []
    original_confidence = []
    new_label = []
    new_confidence = []
    new_o_label_conf = []
    snr = []
    with tf.Graph().as_default() as graph:
        model = CochlearModel(model_path + '.meta')
        pcm = tf.placeholder(tf.float32,
                             shape=[None, None],
                             name='input_audio')
        carliniwagner = CW.CarliniWagnerAttack(model,
                                               learning_rate=1e-5,
                                               initial_const=1e-2,
                                               max_iterations=1000,
                                               confidence=500,
                                               binary_search_steps=2)
        saver = carliniwagner.build_attack(pcm)
    with tf.Session(graph=graph) as sess:
        saver.restore(sess, model_path)
        for i in range(df.shape[0]):
            audio_file_name = file_names[i]
            try:
                data, q = utils_tf._preprocess_data(audio_path,
                                                    audio_file_name)
                data = np.expand_dims(data, axis=0)
            except EOFError:
                print("EOF Error")

            label = utils_tf._convert_label_name_to_label(label_names[i])

            print('Ground truth label:', label_names[i], label)

            tic = time.process_time()
            adv, o_label, o_conf, n_label, n_conf, n_conf_gt = carliniwagner.attack(
                sess, data, label, label, 1, prob_thresh=0.0244)
            toc = time.process_time()

            print('Time for iteration:', i, 'is', toc - tic)
            adv = np.squeeze(adv)
            if (save_data):
                librosa.output.write_wav(
                    adv_audio_path + 'adv-' + audio_file_name, adv,
                    sample_rate)

            audio_name.append(audio_file_name)
            audio_length.append(int(q))
            original_label.append(o_label)
            original_confidence.append(o_conf)
            new_label.append(n_label)
            new_confidence.append(n_conf)
            new_o_label_conf.append(n_conf_gt)
            snr.append(10 *
                       np.log10(np.mean(data**2) / (np.mean((adv - data)**2))))
        if (save_data):
            df_cw = pd.DataFrame({
                'audio_name': audio_name,
                'audio_length': audio_length,
                'original_label': original_label,
                'original_confidence': original_confidence,
                'new_label': new_label,
                'new_confidence': new_confidence,
                'new_orig_conf': new_o_label_conf,
                'SNR': snr
            })

            with open(exp_data_path, 'w') as f:
                df_cw.to_csv(f, header=False)
def carliniwagnertargeted(audio_path,
                          metadata_path,
                          model_path,
                          exp_data_path,
                          adv_audio_path,
                          save_data=False):
    #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data
    #Load dataset to normalize new data
    x, _ = utils_tf._load_dataset(cfg.to_dataset('training'))
    generator = utils.fit_scaler(x)
    df = pd.read_csv(metadata_path)
    gt_labels = df.iloc[:, 2].values
    file_names = df.iloc[:, 1].values
    mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T
    sample_rate = 32000
    label_list = [
        "Bass_drum", "Cello", "Clarinet", "Oboe", "Snare_drum",
        "Violin_or_fiddle"
    ]

    audio_name = []
    audio_length = []
    original_label = []
    original_confidence = []
    new_label = []
    new_confidence = []
    new_o_label_conf = []
    snr = []
    with tf.Graph().as_default() as graph:
        mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32)
        model = CleverHansModel(model_path + '.meta', sample_rate, generator,
                                mel_filt)
        pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio')
        carliniwagner = CW.CarliniWagnerAttack(model,
                                               learning_rate=1e-5,
                                               confidence=500,
                                               targeted=True,
                                               max_iterations=1000,
                                               binary_search_steps=2)
        saver = carliniwagner.build_attack(pcm)
    with tf.Session(graph=graph) as sess:
        saver.restore(sess, model_path)
        for i in range(df.shape[0]):
            audio_file_name = file_names[i]
            try:
                data, q = utils_tf._preprocess_data(audio_path,
                                                    audio_file_name)
            except EOFError:
                print("EOF Error")

            gt_label = gt_labels[i]

            print('Ground truth label:', gt_label, 'Audio_file:',
                  file_names[i])
            for l in range(len(label_list)):
                label = utils_tf._convert_label_name_to_label(label_list[l])
                if (label == gt_label):
                    continue

                adv, o_label, o_conf, n_label, n_conf, n_gt_conf = carliniwagner.attack(
                    sess,
                    data,
                    label,
                    np.repeat(label, int(q)),
                    int(q),
                    prob_thresh=0.975)

                if (save_data):
                    librosa.output.write_wav(
                        adv_audio_path + 'adv-' + label_list[l] + '-' +
                        audio_file_name, adv, sample_rate)

                audio_name.append(audio_file_name)
                audio_length.append(int(q))
                original_label.append(o_label)
                original_confidence.append(o_conf)
                new_label.append(n_label)
                new_confidence.append(n_conf)
                new_o_label_conf.append(n_gt_conf)

                snr.append(
                    10 * np.log10(np.mean(data**2) / (np.mean(adv - data)**2)))
        if (save_data):
            df_cw = pd.DataFrame(
                {
                    'audio_name': audio_name,
                    'audio_length': audio_length,
                    'original_label': original_label,
                    'original_confidence': original_confidence,
                    'new_label': new_label,
                    'new_confidence': new_confidence,
                    'SNR': snr
                },
                columns=[
                    'audio_name', 'audio_length', 'original_label',
                    'original_confidence', 'new_label', 'new_confidence', 'SNR'
                ])

            with open(exp_data_path, 'a') as f:
                df_cw.to_csv(f)
Ejemplo n.º 9
0
def main():
    flags = parse_flags()
    hparams = parse_hparams(flags.hparams)
    print(hparams)
    num_classes=41
    df = pd.read_csv(flags.infer_csv_file)
    file_names = df.iloc[:,0].values
    labels = df.iloc[:,1].values
    
    gt = []
    orig_label = []
    adv_label = []
    orig_pred = []
    adv_pred = []
    snr = []
    files = []
    t = []
    with tf.Graph().as_default() and tf.Session() as sess:
        if(hparams.vgg13_features):
            substitute_model = model.vgg13(hparams,num_classes)
        else:
            substitute_model = model.BaselineCNN(hparams,num_classes)
        cw = CW.CarliniWagnerAttack(model=substitute_model,save_model_dir=flags.save_model_dir,sess=sess,hparams=hparams)
        cw.build_attack()

        for i in range(len(file_names)):
            start_time=time.time()
            #call = ['ffmpeg','-v','quiet','-i',os.path.join(flags.infer_audio_dir,file_names[i]),'-f','f32le', '-ar',str(hparams.sample_rate),'-ac','1','pipe:1']
            #samples = subprocess.check_output(call)
            #data = np.frombuffer(samples, dtype=np.float32)
            data,_ = utils_tf._preprocess_data(flags.infer_audio_dir,file_names[i])                
            lab = utils_tf._convert_label_name_to_label(labels[i])
            print(data.shape)
            
            if(hparams.targeted):
                set_target=False

                while(not set_target):
                    target_label = np.random.randint(41)
                    if(lab != target_label):
                        set_target=True
            else:
                target_label = lab
            
            audio,pred,pred_orig,noise = cw.attack(data,target_label)
            
            if(audio is None):
                continue
            print('TIME IN SECONDS!',time.time()-start_time)
            gt.append(lab)
            orig_label.append(np.argmax(pred_orig))
            orig_pred.append(np.max(pred_orig))
            adv_label.append(np.argmax(pred))
            adv_pred.append(np.max(pred))
            snr.append(noise)
            files.append(file_names[i])
            t.append(time.time()-start_time)
            wav.write(os.path.join(flags.write_audio_dir,file_names[i]),44100,audio) 
        
        df_out = pd.DataFrame({'fname':files,'gt':gt,'original_label':orig_label,'original_pred':
            orig_pred,'adv_label':adv_label,'adv_pred':adv_pred,'snr':snr,'time':t})
        df_out.to_csv('adv_data_vgg13.csv',index=False)