def __init__( self, o_audio_file, t_audio_file, save_p_fname, save_info_fname, MODE="TIMIE", dct_field=0.65, ): self.o_audio_file = o_audio_file self.t_audio_file = t_audio_file self.model = Sincnet.get_speaker_model(MODE) self.model = self.model.eval() self.speaker_label, self.label_speaker = Sincnet.get_speaker_label( MODE) self.save_p_fname = save_p_fname self.save_info_fname = save_info_fname self.dct = lambda x: ffp.dct(x, norm='ortho') self.idct = lambda ix: ffp.idct(ix, norm='ortho') self.o_audio, self.sr = sf.read(o_audio_file) self.t_audio, self.sr = sf.read(t_audio_file) # 目标label以及原始label以及name self.o_label = self.predict_one_label(self.o_audio) self.t_label = self.predict_one_label(self.t_audio) self.o_name = self.get_name_by_label(self.o_label) self.t_name = self.get_name_by_label(self.t_label) # 初始化音频 self.audio_len = min(len(self.o_audio), len(self.t_audio)) self.o_audio = self.o_audio[:self.audio_len] self.t_audio = self.t_audio[:self.audio_len] self.o_audio /= np.linalg.norm(self.o_audio, np.inf) self.t_audio /= np.linalg.norm(self.t_audio, np.inf) # 初始化音频微缩 self.o_audio *= 0.95 self.t_audio *= 0.95 self.o2_audio = None # 定义扰动部分变量 self.best_pretub_scale = float('inf') self.best_pretub = None self.best_clip_scale = float('inf') self.best_clip_perturb = None self.interval = [None, None] self.clip_perturb_len = None # 超参 self.query_num = 0 self.theta = 1e-4 # 二分结束条件 self.dct_field = dct_field
def __init__(self, MODE, SAVE_DIR_PATH, query_num, interval, num_iterations=2000, gamma=1.0, stepsize_search='geometric_progression', max_num_evals=1e4, init_num_evals=100, query_limit=25000, verbose=True): self.model = Sincnet.get_speaker_model(MODE) self.num_iterations = num_iterations self.gamma = gamma self.stepsize_search = stepsize_search self.max_num_evals = max_num_evals self.init_num_evals = init_num_evals self.verbose = verbose self.SAVE_DIR_PATH = SAVE_DIR_PATH self.query_num = query_num self.query_limit = query_limit self.interval = interval self.dct = lambda x: ffp.dct(x, norm='ortho') self.idct = lambda ix: ffp.idct(ix, norm='ortho')
def predict_one_label(self, data): self.query_num += 1 data = data.squeeze() pred_real, pred_pro = Sincnet.sentence_test( self.model, torch.from_numpy(data).float().cuda()) return pred_real
def __init__(self, MODE, SAVE_DIR_PATH, num_iterations=2000, gamma=1.0, stepsize_search='geometric_progression', max_num_evals=1e4, init_num_evals=100, query_limit=25000, verbose=True): self.model = Sincnet.get_speaker_model(MODE) self.num_iterations = num_iterations self.gamma = gamma self.stepsize_search = stepsize_search self.max_num_evals = max_num_evals self.init_num_evals = init_num_evals self.verbose = verbose self.SAVE_DIR_PATH = SAVE_DIR_PATH self.query_num = 0 self.query_limit = query_limit
# -- coding: utf-8 -- from utils import Sincnet import torch import numpy as np import soundfile as sf import librosa.display import matplotlib.pyplot as plt import pickle import utils import os import LOCAL_ATT_HSJA_ATTACK MODE = "TIMIT" abs_path=os.getcwd() model = Sincnet.get_speaker_model(MODE) speaker_label, label_speaker = Sincnet.get_speaker_label(MODE) def mkd(name): if os.path.exist(name)==False: os.mkdir(os.path.join(abs_path,name)) if MODE=="Librispeech": save_dir = "lahresult\lib" save_adv_dir="lahresult\libaudio" mkd(save_dir) mkd(save_adv_dir) attackdir = r"AttackDataset\lib-attack-audio" targetdir = r"AttackDataset\lib-target-audio" else: save_dir = r"lahresult\timit" save_adv_dir = r"lahresult\timitaudio" mkd(save_dir)
def show_max(wav): qq,_ = Sincnet.sentence_test_lib(model,wav.float().cuda()) print(qq)
# -*- coding: UTF-8 -*- from utils import Sincnet #from utils.sincet_for_lib import * import torch import numpy as np import soundfile as sf import os import time from LocalSearch.select_region_attack_timit import select_region MODE = "Librispeech" #model = get_speaker_model2(MODE) model = Sincnet.get_speaker_model(MODE) speaker_label, label_speaker = Sincnet.get_speaker_label(MODE) def show_max(wav): qq,_ = Sincnet.sentence_test_lib(model,wav.float().cuda()) print(qq) adir=r"F:\SR-ATK\用于画图sampleaudio\lib-attack-audio" tdir=r"F:\SR-ATK\用于画图sampleaudio\lib-target-audio" o_audio_files = os.listdir(adir) o_audio_files.sort(key=lambda item: int(item.split('-')[0])) t_audio_files = os.listdir(tdir) t_audio_files.sort(key=lambda item: int(item.split('-')[0])) save_txt_path=r"F:\SR-ATK\占比记录\newlib.txt" record=[]#[非目标分类区占比 , 攻击区域占比,原初始扰动大小,更新的初始扰动大小] for a,t in zip(o_audio_files,t_audio_files): a=os.path.join(adir,a) t=os.path.join(tdir,t)
def predict_one_label(model, audio): qq, _ = Sincnet.sentence_test_lib(model, audio.float().cuda()) return qq
def __init__(self, MODE, savepath, limit_count=25000): self.querynum = 0 self.model = Sincnet.get_speaker_model(MODE) self.savepath = savepath self.limit_count = limit_count
def predict_label(self,data): pred_real, pred_pro = Sincnet.sentence_test(self.model, torch.from_numpy(data).float().cuda()) return pred_real