def __init__(
        self,
        o_audio_file,
        t_audio_file,
        save_p_fname,
        save_info_fname,
        MODE="TIMIE",
        dct_field=0.65,
    ):
        self.o_audio_file = o_audio_file
        self.t_audio_file = t_audio_file
        self.model = Sincnet.get_speaker_model(MODE)
        self.model = self.model.eval()
        self.speaker_label, self.label_speaker = Sincnet.get_speaker_label(
            MODE)
        self.save_p_fname = save_p_fname
        self.save_info_fname = save_info_fname

        self.dct = lambda x: ffp.dct(x, norm='ortho')
        self.idct = lambda ix: ffp.idct(ix, norm='ortho')
        self.o_audio, self.sr = sf.read(o_audio_file)
        self.t_audio, self.sr = sf.read(t_audio_file)
        # 目标label以及原始label以及name
        self.o_label = self.predict_one_label(self.o_audio)
        self.t_label = self.predict_one_label(self.t_audio)
        self.o_name = self.get_name_by_label(self.o_label)
        self.t_name = self.get_name_by_label(self.t_label)
        # 初始化音频
        self.audio_len = min(len(self.o_audio), len(self.t_audio))
        self.o_audio = self.o_audio[:self.audio_len]
        self.t_audio = self.t_audio[:self.audio_len]
        self.o_audio /= np.linalg.norm(self.o_audio, np.inf)
        self.t_audio /= np.linalg.norm(self.t_audio, np.inf)
        # 初始化音频微缩
        self.o_audio *= 0.95
        self.t_audio *= 0.95
        self.o2_audio = None
        # 定义扰动部分变量
        self.best_pretub_scale = float('inf')
        self.best_pretub = None
        self.best_clip_scale = float('inf')
        self.best_clip_perturb = None
        self.interval = [None, None]
        self.clip_perturb_len = None
        # 超参
        self.query_num = 0
        self.theta = 1e-4  # 二分结束条件
        self.dct_field = dct_field
 def __init__(self,
              MODE,
              SAVE_DIR_PATH,
              query_num,
              interval,
              num_iterations=2000,
              gamma=1.0,
              stepsize_search='geometric_progression',
              max_num_evals=1e4,
              init_num_evals=100,
              query_limit=25000,
              verbose=True):
     self.model = Sincnet.get_speaker_model(MODE)
     self.num_iterations = num_iterations
     self.gamma = gamma
     self.stepsize_search = stepsize_search
     self.max_num_evals = max_num_evals
     self.init_num_evals = init_num_evals
     self.verbose = verbose
     self.SAVE_DIR_PATH = SAVE_DIR_PATH
     self.query_num = query_num
     self.query_limit = query_limit
     self.interval = interval
     self.dct = lambda x: ffp.dct(x, norm='ortho')
     self.idct = lambda ix: ffp.idct(ix, norm='ortho')
 def predict_one_label(self, data):
     self.query_num += 1
     data = data.squeeze()
     pred_real, pred_pro = Sincnet.sentence_test(
         self.model,
         torch.from_numpy(data).float().cuda())
     return pred_real
Ejemplo n.º 4
0
 def __init__(self,
              MODE,
              SAVE_DIR_PATH,
              num_iterations=2000,
              gamma=1.0,
              stepsize_search='geometric_progression',
              max_num_evals=1e4,
              init_num_evals=100,
              query_limit=25000,
              verbose=True):
     self.model = Sincnet.get_speaker_model(MODE)
     self.num_iterations = num_iterations
     self.gamma = gamma
     self.stepsize_search = stepsize_search
     self.max_num_evals = max_num_evals
     self.init_num_evals = init_num_evals
     self.verbose = verbose
     self.SAVE_DIR_PATH = SAVE_DIR_PATH
     self.query_num = 0
     self.query_limit = query_limit
# -- coding: utf-8 --
from utils import Sincnet
import torch
import numpy as np
import soundfile as sf
import librosa.display
import matplotlib.pyplot as plt
import pickle
import utils
import os
import LOCAL_ATT_HSJA_ATTACK

MODE = "TIMIT"
abs_path=os.getcwd()

model = Sincnet.get_speaker_model(MODE)
speaker_label, label_speaker = Sincnet.get_speaker_label(MODE)
def mkd(name):
    if os.path.exist(name)==False:
        os.mkdir(os.path.join(abs_path,name))
if MODE=="Librispeech":
    save_dir = "lahresult\lib"
    save_adv_dir="lahresult\libaudio"
    mkd(save_dir)
    mkd(save_adv_dir)
    attackdir = r"AttackDataset\lib-attack-audio"
    targetdir = r"AttackDataset\lib-target-audio"
else:
    save_dir = r"lahresult\timit"
    save_adv_dir = r"lahresult\timitaudio"
    mkd(save_dir)
def show_max(wav):
    qq,_ = Sincnet.sentence_test_lib(model,wav.float().cuda())
    print(qq)
# -*- coding: UTF-8 -*-
from utils import Sincnet
#from utils.sincet_for_lib import *
import torch
import numpy as np
import soundfile as sf
import os
import time
from LocalSearch.select_region_attack_timit import select_region

MODE = "Librispeech"
#model = get_speaker_model2(MODE)
model = Sincnet.get_speaker_model(MODE)
speaker_label, label_speaker = Sincnet.get_speaker_label(MODE)

def show_max(wav):
    qq,_ = Sincnet.sentence_test_lib(model,wav.float().cuda())
    print(qq)

adir=r"F:\SR-ATK\用于画图sampleaudio\lib-attack-audio"
tdir=r"F:\SR-ATK\用于画图sampleaudio\lib-target-audio"
o_audio_files = os.listdir(adir)
o_audio_files.sort(key=lambda item: int(item.split('-')[0]))
t_audio_files = os.listdir(tdir)
t_audio_files.sort(key=lambda item: int(item.split('-')[0]))

save_txt_path=r"F:\SR-ATK\占比记录\newlib.txt"
record=[]#[非目标分类区占比 , 攻击区域占比,原初始扰动大小,更新的初始扰动大小]
for a,t in zip(o_audio_files,t_audio_files):
    a=os.path.join(adir,a)
    t=os.path.join(tdir,t)
def predict_one_label(model, audio):
    qq, _ = Sincnet.sentence_test_lib(model, audio.float().cuda())
    return qq
Ejemplo n.º 9
0
 def __init__(self, MODE, savepath, limit_count=25000):
     self.querynum = 0
     self.model = Sincnet.get_speaker_model(MODE)
     self.savepath = savepath
     self.limit_count = limit_count
 def predict_label(self,data):
     pred_real, pred_pro = Sincnet.sentence_test(self.model,
                                                 torch.from_numpy(data).float().cuda())
     return pred_real