def detect(args): import log val_loss = log.history["val_loss"] val_acc = log.history["val_Acc"] choose = [] for i in range(1): ''' min_loss = np.argmin(val_loss) choose.append(min_loss) val_loss[min_loss] = np.inf ''' max_acc = np.argmax(val_acc) choose.append(max_acc) val_acc[max_acc] = 0 (te_x, te_y, te_na_list) = load_hdf5(args.te_hdf5_path, verbose=1) x = te_x y = te_y na_list = te_na_list x = do_scale(x, args.scaler_path, verbose=1) fusion_at_list = [] fusion_sed_list = [] #choose=[48] for epoch in choose: t1 = time.time() [model_path ] = glob.glob(os.path.join(args.model_dir, "*-%04d-*hdf5" % epoch)) model = load_model(model_path, custom_objects={ 'focal_loss_fixed': focal_loss(), 'Acc': myacc() }) print("load the model: %s" % model_path) # Audio tagging pred = model.predict(x) fusion_at_list.append(pred) print("Prediction time: %s" % (time.time() - t1, )) # Write out AT probabilities fusion_at = np.mean(np.array(fusion_at_list), axis=0) print("AT shape: %s" % (fusion_at.shape, )) if os.path.exists("result"): shutil.rmtree("result") for audio_ind in range(fusion_at.shape[0]): #if na_list[audio_ind]=="mixture_babycry_0.0_0016_65021d74d0fb56db84b63896e2ff5ec9.wav": # picture_path=os.path.join("result","picture",na_list[audio_ind].replace("wav","jpg")) # pdb.set_trace() # my_plot(fusion_at[audio_ind,...], y[audio_ind,...], picture_path) picture_path = os.path.join("result", "picture", na_list[audio_ind].replace("wav", "jpg")) #my_plot(fusion_at[audio_ind,...], y[audio_ind,...], picture_path) my_plot(fusion_at[audio_ind, ...], y[audio_ind, ...], picture_path, threshold=cfg.threshold) print("Prediction finished!")
def recognize(args, at_bool, sed_bool): (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1) x = te_x y = te_y na_list = te_na_list x = do_scale(x, args.scaler_path, verbose=1) fusion_at_list = [] fusion_sed_list = [] for epoch in range(20, 30, 1): t1 = time.time() print(epoch) model_p = "gatedAct_rationBal44_lr0.001_normalization_at_cnnRNN_64newMel_240fr.06-0.8333.hdf5" #[model_path] = glob.glob(os.path.join(args.model_dir, # "*.%02d-0.*.hdf5" % epoch)) [model_path] = glob.glob(os.path.join(args.model_dir, model_p)) model = load_model(model_path) # Audio tagging if at_bool: pred = model.predict(x) fusion_at_list.append(pred) # Sound event detection if sed_bool: in_layer = model.get_layer('in_layer') loc_layer = model.get_layer('localization_layer') func = K.function( [in_layer.input, K.learning_phase()], [loc_layer.output]) pred3d = run_func(func, x, batch_size=20) fusion_sed_list.append(pred3d) print("Prediction time: %s" % (time.time() - t1, )) # Write out AT probabilities if at_bool: fusion_at = np.mean(np.array(fusion_at_list), axis=0) print("AT shape: %s" % (fusion_at.shape, )) io_task4.at_write_prob_mat_to_csv(na_list=na_list, prob_mat=fusion_at, out_path=os.path.join( args.out_dir, "at_prob_mat.csv.gz")) # Write out SED probabilites if sed_bool: fusion_sed = np.mean(np.array(fusion_sed_list), axis=0) print("SED shape:%s" % (fusion_sed.shape, )) io_task4.sed_write_prob_mat_list_to_csv( na_list=na_list, prob_mat_list=fusion_sed, out_path=os.path.join(args.out_dir, "sed_prob_mat_list.csv.gz")) print("Prediction finished!")
def train(args): if os.path.exists(args.out_model_dir): shutil.rmtree(args.out_model_dir) create_folder(args.out_model_dir) num_classes = cfg.num_classes # Load training & testing data (tr_x, tr_y, tr_na_list) = load_hdf5(args.tr_hdf5_path, verbose=1) (te_x, te_y, te_na_list) = load_hdf5(args.te_hdf5_path, verbose=1) print("") # Scale data tr_x = do_scale(tr_x, args.scaler_path, verbose=1) te_x = do_scale(te_x, args.scaler_path, verbose=1) # Build model (_, n_time, n_freq) = tr_x.shape #pdb.set_trace() input = Input(shape=(n_time, n_freq), name='input_layer') input_ = Reshape((n_time, n_freq, 1))(input) ''' block1 = Conv_BN(input_, 8, (3, 3), act="relu") block1 = Conv_BN(block1, 32, (3, 3), act="relu") block1 = Conv_BN(block1, 64, (3, 3), act="relu") block1 = block_a(input_, 8) block1 = block_a(block1, 32) block1 = block_a(block1, 64) ''' block1 = block_b(input_, 8) block1 = block_b(block1, 32) block1 = block_b(block1, 64) block1 = MaxPooling2D(pool_size=(1, 2))(block1) block2 = block_c(block1, 64) block2 = MaxPooling2D(pool_size=(1, 2))(block2) block3 = block_c(block2, 64) block3 = MaxPooling2D(pool_size=(1, 2))(block3) block4 = block_c(block3, 64) block4 = MaxPooling2D(pool_size=(1, 2))(block4) cnnout = Conv_BN(block4, 128, (1, 1), act="relu", bias=True) cnnout = MaxPooling2D(pool_size=(1, 2))(cnnout) cnnout = Reshape((240, 256))(cnnout) rnn = Bidirectional( GRU(128, activation='relu', return_sequences=True, kernel_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01)))(cnnout) out = TimeDistributed(Dense( num_classes, activation='softmax', kernel_regularizer=regularizers.l2(0.0), ), name='output_layer')(rnn) model = Model(input, out) model.summary() # Compile model adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.009) sgd = optimizers.SGD(lr=0.01, momentum=0.9, decay=0.0) model.compile(loss=focal_loss(alpha=[1, 1, 1, 1], gamma=1), optimizer="adam", metrics=[myacc(threshold=0.5)]) # Save model callback filepath = os.path.join( args.out_model_dir, "aed-batchsize_50-lr_0.01-{epoch:04d}-{val_Acc:.4f}.hdf5") save_model = ModelCheckpoint(filepath=filepath, monitor='val_Acc', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) # Train ''' history=model.fit( x=tr_x, y=tr_y, batch_size=50, epochs=200, verbose=1, shuffle=True, class_weight="auto", callbacks=[save_model], validation_data=(te_x,te_y) ) ''' # Data generator gen = Generator(batch_size=50, type='train') history = model.fit_generator( generator=gen.generate([tr_x], [tr_y]), steps_per_epoch=300, # 100 iters is called an 'epoch' epochs=100, # Maximum 'epoch' to train verbose=1, class_weight="auto", callbacks=[save_model], validation_data=(te_x, te_y)) with open('src/log.py', 'w') as f: f.write("history=") f.write(str(history.history))
def train(args): num_classes = cfg.num_classes # Load training & testing data (tr_x, tr_y, tr_na_list) = load_hdf5_data(args.tr_hdf5_path, verbose=1) (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1) print("tr_x.shape: %s" % (tr_x.shape,))#removed this dec4 since its not helpful really # Scale data tr_x = do_scale(tr_x, args.scaler_path, verbose=1) te_x = do_scale(te_x, args.scaler_path, verbose=1) #print("delme dec 1, tr_x.shape", tr_x.shape)#output=51, 240, 64 #print("delme dec 1, te_x.shape", te_x.shape)#:51, 240, 64 # Build model (_, n_time, n_freq) = tr_x.shape # (N, 240, 64) input_logmel = Input(shape=(n_time, n_freq), name='in_layer') # (N, 240, 64) a1 = Reshape((n_time, n_freq, 1))(input_logmel) # (N, 240, 64, 1) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 32, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 16, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 8, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 4, 128) a1 = Conv2D(256, (3, 3), padding="same", activation="relu", use_bias=True)(a1) a1 = MaxPooling2D(pool_size=(1, 4))(a1) # (N, 240, 1, 256) a1 = Reshape((240, 256))(a1) # (N, 240, 256) # Gated BGRU rnnout = Bidirectional(GRU(128, activation='linear', return_sequences=True))(a1) rnnout_gate = Bidirectional(GRU(128, activation='sigmoid', return_sequences=True))(a1) a2 = Multiply()([rnnout, rnnout_gate]) # Attention cla = TimeDistributed(Dense(num_classes, activation='sigmoid'), name='localization_layer')(a2) att = TimeDistributed(Dense(num_classes, activation='softmax'))(a2) out = Lambda(outfunc, output_shape=(num_classes,))([cla, att]) model = Model(input_logmel, out) model.summary() adam_optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) # Compile model model.compile(loss='binary_crossentropy', optimizer=adam_optimizer, metrics=['accuracy'])#finn delme dec1 you can change this to categorical_accuracy to see if you can subvert the keras error. However you dont know if its the right hting to do. Keep a look out at the results to determineif it did what you wanted # Save model callback print("working here 1") filepath = os.path.join(args.out_model_dir, "{0}_{1}.hdf5".format(args.model_name, args.epochs)) create_folder(os.path.dirname(filepath)) save_model = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) # Data generator # Train t_train = time.time() print("FINN Training started")#this is really just me seeing if this is where most of the time is spent use_generator = False if use_generator: gen = RatioDataGenerator(batch_size=args.batch_size, type='train')#batch size should be manipulated from 44 model.fit_generator(generator=gen.generate([tr_x], [tr_y]), steps_per_epoch=args.steps_p_epoch, # 100 iters is called an 'epoch' epochs=args.epochs, #31 # Maximum 'epoch' to train verbose=1, callbacks=[save_model], validation_data=(te_x, te_y)) else: model.fit(x=tr_x, y=tr_y, batch_size=20, epochs=args.epochs, verbose=1, callbacks=[save_model], validation_split=0.05, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=args.init_epoch, steps_per_epoch=None, validation_steps=None) model.save(os.path.join(args.out_model_dir, "final_model_{}_{}epochs.h5".format(args.model_name, args.epochs)))#am not sure if fit will save the final epoch.. pretty sure it does tho print("FINN Training finished, time taken: ", (time.time()-t_train))#this is really just me seeing if this is where most of the time is spent
def recognize(args, at_bool, sed_bool): t_rec = time.time() print("FINN Recognize started") #print("recognize!")#todo remove (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1) x = te_x y = te_y na_list = te_na_list #print("delme dec 1, num_classes", cfg.num_classes)#num_classes 3 x = do_scale(x, args.scaler_path, verbose=1) fusion_at_list = [] fusion_sed_list = [] #for epoch in range(20, 30, 1):#hmm this value might need to be changed depending on nmber of epochs in model.fit_generator(... although this isnt a train session so maybe not! for epoch in range(1, args.epochs+1):#chane this when you want to increase epochs and reduce the amount of epochs pred'd over eg: range(20, epochs, 1)# range(1, args.epochs+1) is for args.epochs epochs btw #this allows you to go over all the epochs and average the scroe. but why? t1 = time.time() model_path = os.path.join(args.model_dir, "{0}_{1}.hdf5".format(args.model_name, epoch)) print("model_path", model_path) model = load_model(model_path) # Audio tagging if at_bool: #dec4 this is where we find that shapes change from y having (#, n_classes) to (#, 2)... Dont know how to fix but i think its in changing the keras model #x.shape delme (22, 240, 64) #pred.shape delme (22, 2) t_pred = time.time() print("FINN at_pred started") pred = model.predict(x) print("FINN at_pred ended", (time.time()-t_pred)) fusion_at_list.append(pred) # Sound event detection if sed_bool: t_pred = time.time() print("FINN pred_sed started") in_layer = model.get_layer('in_layer') loc_layer = model.get_layer('localization_layer') func = K.function([in_layer.input, K.learning_phase()], [loc_layer.output]) pred3d = run_func(func, x, batch_size=20) fusion_sed_list.append(pred3d) print("FINN pred_sed ended", (time.time()-t_pred)) print("Prediction time: %s" % (time.time() - t1,)) # Write out AT probabilities if at_bool: fusion_at = np.mean(np.array(fusion_at_list), axis=0) print("AT shape: %s" % (fusion_at.shape,)) io_task4.at_write_prob_mat_to_csv( na_list=na_list, prob_mat=fusion_at, out_path=os.path.join(args.out_dir, "at_prob_mat.csv.gz")) # Write out SED probabilites if sed_bool: fusion_sed = np.mean(np.array(fusion_sed_list), axis=0) print("SED shape:%s" % (fusion_sed.shape,)) io_task4.sed_write_prob_mat_list_to_csv( na_list=na_list, prob_mat_list=fusion_sed, out_path=os.path.join(args.out_dir, "sed_prob_mat_list.csv.gz")) print("FINN Prediction finished!, time: ", (time.time()-t_rec))#this is really just me seeing if this is where most of the time is spent)
def train(args): num_classes = cfg.num_classes # Load training & testing data (tr_x, tr_y, tr_na_list) = load_hdf5_data(args.tr_hdf5_path, verbose=1) (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1) print("tr_x.shape: %s" % (tr_x.shape,)) # Scale data tr_x = do_scale(tr_x, args.scaler_path, verbose=1) te_x = do_scale(te_x, args.scaler_path, verbose=1) # Build model (_, n_time, n_freq) = tr_x.shape # (N, 240, 64) input_logmel = Input(shape=(n_time, n_freq), name='in_layer') # (N, 240, 64) a1 = Reshape((n_time, n_freq, 1))(input_logmel) # (N, 240, 64, 1) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 32, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 16, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 8, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 4, 128) a1 = Conv2D(256, (3, 3), padding="same", activation="relu", use_bias=True)(a1) a1 = MaxPooling2D(pool_size=(1, 4))(a1) # (N, 240, 1, 256) a1 = Reshape((240, 256))(a1) # (N, 240, 256) # Gated BGRU rnnout = Bidirectional(GRU(128, activation='linear', return_sequences=True))(a1) rnnout_gate = Bidirectional(GRU(128, activation='sigmoid', return_sequences=True))(a1) a2 = Multiply()([rnnout, rnnout_gate]) # Attention cla = TimeDistributed(Dense(num_classes, activation='sigmoid'), name='localization_layer')(a2) att = TimeDistributed(Dense(num_classes, activation='softmax'))(a2) out = Lambda(outfunc, output_shape=(num_classes,))([cla, att]) model = Model(input_logmel, out) model.summary() # Compile model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # Save model callback filepath = os.path.join(args.out_model_dir, "gatedAct_rationBal44_lr0.001_normalization_at_cnnRNN_64newMel_240fr.{epoch:02d}-{val_acc:.4f}.hdf5") create_folder(os.path.dirname(filepath)) save_model = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) # Data generator gen = RatioDataGenerator(batch_size=44, type='train') # Train model.fit_generator(generator=gen.generate([tr_x], [tr_y]), steps_per_epoch=100, # 100 iters is called an 'epoch' epochs=31, # Maximum 'epoch' to train verbose=1, callbacks=[save_model], validation_data=(te_x, te_y))