# Feature extractor CNN CNN_arch = { 'input_dim': wlen, 'fs': fs, 'cnn_N_filt': cnn_N_filt, 'cnn_len_filt': cnn_len_filt, 'cnn_max_pool_len': cnn_max_pool_len, 'cnn_use_laynorm_inp': cnn_use_laynorm_inp, 'cnn_use_batchnorm_inp': cnn_use_batchnorm_inp, 'cnn_use_laynorm': cnn_use_laynorm, 'cnn_use_batchnorm': cnn_use_batchnorm, 'cnn_act': cnn_act, 'cnn_drop': cnn_drop } CNN_net = CNN(CNN_arch) CNN_net.to(device) DNN1_arch = { 'input_dim': CNN_net.out_dim, 'fc_lay': fc_lay, 'fc_drop': fc_drop, 'fc_use_batchnorm': fc_use_batchnorm, 'fc_use_laynorm': fc_use_laynorm, 'fc_use_laynorm_inp': fc_use_laynorm_inp, 'fc_use_batchnorm_inp': fc_use_batchnorm_inp, 'fc_act': fc_act } DNN1_net = MLP(DNN1_arch) DNN1_net.to(device)
# Feature extractor CNN CNN_arch = {'input_dim': wlen, 'fs': fs, 'cnn_N_filt': cnn_N_filt, 'cnn_len_filt': cnn_len_filt, 'cnn_max_pool_len':cnn_max_pool_len, 'cnn_use_laynorm_inp': cnn_use_laynorm_inp, 'cnn_use_batchnorm_inp': cnn_use_batchnorm_inp, 'cnn_use_laynorm':cnn_use_laynorm, 'cnn_use_batchnorm':cnn_use_batchnorm, 'cnn_act': cnn_act, 'cnn_drop':cnn_drop, } CNN_net=CNN(CNN_arch) CNN_net.cuda() # Loading label dictionary lab_dict=np.load(class_dict_file).item() print(CNN_net.out_dim) DNN1_arch = {'input_dim': CNN_net.out_dim, 'fc_lay': fc_lay, 'fc_drop': fc_drop, 'fc_use_batchnorm': fc_use_batchnorm, 'fc_use_laynorm': fc_use_laynorm, 'fc_use_laynorm_inp': fc_use_laynorm_inp, 'fc_use_batchnorm_inp':fc_use_batchnorm_inp, 'fc_act': fc_act,
# Feature extractor CNN CNN_arch = { 'input_dim': wlen, 'fs': fs, 'cnn_N_filt': cnn_N_filt, 'cnn_len_filt': cnn_len_filt, 'cnn_max_pool_len': cnn_max_pool_len, 'cnn_use_laynorm_inp': cnn_use_laynorm_inp, 'cnn_use_batchnorm_inp': cnn_use_batchnorm_inp, 'cnn_use_laynorm': cnn_use_laynorm, 'cnn_use_batchnorm': cnn_use_batchnorm, 'cnn_act': cnn_act, 'cnn_drop': cnn_drop, } CNN_net = CNN(CNN_arch) CNN_net.cuda() # Loading label dictionary lab_dict = np.load(class_dict_file).item() DNN1_arch = { 'input_dim': CNN_net.out_dim, 'fc_lay': fc_lay, 'fc_drop': fc_drop, 'fc_use_batchnorm': fc_use_batchnorm, 'fc_use_laynorm': fc_use_laynorm, 'fc_use_laynorm_inp': fc_use_laynorm_inp, 'fc_use_batchnorm_inp': fc_use_batchnorm_inp, 'fc_act': fc_act, }
# Feature extractor CNN(SincNet) CNN_arch = { 'input_dim': wlen, 'fs': fs, 'cnn_N_filt': cnn_N_filt, 'cnn_len_filt': cnn_len_filt, 'cnn_max_pool_len': cnn_max_pool_len, 'cnn_use_laynorm_inp': cnn_use_laynorm_inp, 'cnn_use_batchnorm_inp': cnn_use_batchnorm_inp, 'cnn_use_laynorm': cnn_use_laynorm, 'cnn_use_batchnorm': cnn_use_batchnorm, 'cnn_act': cnn_act, 'cnn_drop': cnn_drop, } CNN_net = CNN(CNN_arch) CNN_net.to(device) # 建立注意力机制 # print(CNN_net.out_dim) 6420 AttentionModule = DoubleMHA(CNN_net.out_dim, 20) # 8 16 32的头数 # 修改这里保证pooling中assert self.encoder_size % heads_number == 0 # d_model 可以通过 # Loading label dictionary lab_dict = np.load(class_dict_file, allow_pickle=True).item() # print(CNN_net.out_dim) 6420 DNN1_arch = { 'input_dim': 321, # CNN_net.out_dim where 321 means after attention machine dim 'fc_lay': fc_lay,
else: if architecture in ['SincNet_raw', 'CNN_raw']: CNN_arch = {'input_dim': wlen, 'fs': fs, 'cnn_N_filt': cnn_N_filt, 'cnn_len_filt': cnn_len_filt, 'cnn_max_pool_len':cnn_max_pool_len, 'cnn_use_laynorm_inp': cnn_use_laynorm_inp, 'cnn_use_batchnorm_inp': cnn_use_batchnorm_inp, 'cnn_use_laynorm':cnn_use_laynorm, 'cnn_use_batchnorm':cnn_use_batchnorm, 'cnn_act': cnn_act, 'cnn_drop':cnn_drop, } if architecture == 'SincNet_raw': CNN_net = SincNet(CNN_arch) else: CNN_net = ConvNet(CNN_arch) MLP_after = {'input_dim': CNN_net.out_dim, 'fc_lay': fc_lay, 'fc_drop': fc_drop, 'fc_use_batchnorm': fc_use_batchnorm, 'fc_use_laynorm': fc_use_laynorm, 'fc_use_laynorm_inp': fc_use_laynorm_inp, 'fc_use_batchnorm_inp':fc_use_batchnorm_inp, 'fc_act': fc_act, } model = FunTimesCNN(MLP_before, MLP_after, CNN_arch, use_sinc_net=architecture=='SincNet_raw') elif architecture == 'CNN_features':
try: os.stat(output_folder) except: os.mkdir(output_folder) # setting seed torch.manual_seed(seed) np.random.seed(seed) # Loading label dictionary lab_dict = np.load(class_dict_file, allow_pickle=True).item() # 读取模型 checkpoint = torch.load('exp/SincNet_TIMIT/model_raw') CNN_net = CNN({}) CNN_net.load_state_dict(checkpoint['CNN_model_par']) DNN1_net = MLP({}) DNN1_net.load_state_dict(checkpoint['DNN1_model_par']) DNN2_net = MLP({}) DNN2_net.load_state_dict(checkpoint['DNN2_model_par']) eer = 0 for i in range(N_batches): # 将test信号预处理batch TODO data—folder是test数据集 [inp, lab] = create_batches_rnd(batch_size, data_folder, wav_lst_te, snt_te, wlen, lab_dict, 0.2) pout = DNN2_net(DNN1_net(CNN_net(inp))) pred = torch.max(pout, dim=1)[1] # 寻找最大那个就是预测的谁 fpr, tpr, thresholds = roc_curve(lab, pred, pos_label=1)