def recognize(args, at_bool, sed_bool): (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1) x = te_x y = te_y na_list = te_na_list x = do_scale(x, args.scaler_path, verbose=1) fusion_at_list = [] fusion_sed_list = [] for epoch in range(20, 30, 1): t1 = time.time() print(epoch) model_p = "gatedAct_rationBal44_lr0.001_normalization_at_cnnRNN_64newMel_240fr.06-0.8333.hdf5" #[model_path] = glob.glob(os.path.join(args.model_dir, # "*.%02d-0.*.hdf5" % epoch)) [model_path] = glob.glob(os.path.join(args.model_dir, model_p)) model = load_model(model_path) # Audio tagging if at_bool: pred = model.predict(x) fusion_at_list.append(pred) # Sound event detection if sed_bool: in_layer = model.get_layer('in_layer') loc_layer = model.get_layer('localization_layer') func = K.function( [in_layer.input, K.learning_phase()], [loc_layer.output]) pred3d = run_func(func, x, batch_size=20) fusion_sed_list.append(pred3d) print("Prediction time: %s" % (time.time() - t1, )) # Write out AT probabilities if at_bool: fusion_at = np.mean(np.array(fusion_at_list), axis=0) print("AT shape: %s" % (fusion_at.shape, )) io_task4.at_write_prob_mat_to_csv(na_list=na_list, prob_mat=fusion_at, out_path=os.path.join( args.out_dir, "at_prob_mat.csv.gz")) # Write out SED probabilites if sed_bool: fusion_sed = np.mean(np.array(fusion_sed_list), axis=0) print("SED shape:%s" % (fusion_sed.shape, )) io_task4.sed_write_prob_mat_list_to_csv( na_list=na_list, prob_mat_list=fusion_sed, out_path=os.path.join(args.out_dir, "sed_prob_mat_list.csv.gz")) print("Prediction finished!")
def recognize(args, at_bool, sed_bool): (te_x, _, te_na_list) = load_hdf5_data(args.hdf5_path, verbose=1) x = te_x na_list = te_na_list # x[:, :64] = do_scale(x[:, :64], args.scaler_path, verbose=1) fusion_at_list = [] fusion_sed_list = [] for epoch in range(25, 30, 1): t1 = time.time() file_name = os.path.join(args.model_dir, "*.%02d-0.*.hdf5" % epoch) model_path = glob.glob(file_name)[0] # returns more than one item so can't unpack model = load_model(model_path) # Audio tagging if at_bool: pred = model.predict(x, batch_size=5) fusion_at_list.append(pred) # Sound event detection if sed_bool: in_layer = model.get_layer('in_layer') loc_layer = model.get_layer('localization_layer') func = K.function([in_layer.input, K.learning_phase()], [loc_layer.output]) pred3d = run_func(func, x, batch_size=20) fusion_sed_list.append(pred3d) print("Prediction time: %s" % (time.time() - t1,)) # Write out AT probabilities if at_bool: fusion_at = np.mean(np.array(fusion_at_list), axis=0) print("AT shape: %s" % (fusion_at.shape,)) io_task4.at_write_prob_mat_to_csv( na_list=na_list, prob_mat=fusion_at, out_path=args.out_dir) # "at_audio_prob_mat.csv.gz" # Write out SED probabilites if sed_bool: fusion_sed = np.mean(np.array(fusion_sed_list), axis=0) print("SED shape:%s" % (fusion_sed.shape,)) io_task4.sed_write_prob_mat_list_to_csv( na_list=na_list, prob_mat_list=fusion_sed, out_path=os.path.join(args.out_dir, "sed_prob_mat_list.csv.gz")) print("Prediction finished!")
def train(args): num_classes = cfg.num_classes # Load training & testing data (tr_x, tr_y, tr_na_list) = load_hdf5_data(args.tr_hdf5_path, verbose=1) (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1) print("tr_x.shape: %s" % (tr_x.shape,))#removed this dec4 since its not helpful really # Scale data tr_x = do_scale(tr_x, args.scaler_path, verbose=1) te_x = do_scale(te_x, args.scaler_path, verbose=1) #print("delme dec 1, tr_x.shape", tr_x.shape)#output=51, 240, 64 #print("delme dec 1, te_x.shape", te_x.shape)#:51, 240, 64 # Build model (_, n_time, n_freq) = tr_x.shape # (N, 240, 64) input_logmel = Input(shape=(n_time, n_freq), name='in_layer') # (N, 240, 64) a1 = Reshape((n_time, n_freq, 1))(input_logmel) # (N, 240, 64, 1) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 32, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 16, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 8, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 4, 128) a1 = Conv2D(256, (3, 3), padding="same", activation="relu", use_bias=True)(a1) a1 = MaxPooling2D(pool_size=(1, 4))(a1) # (N, 240, 1, 256) a1 = Reshape((240, 256))(a1) # (N, 240, 256) # Gated BGRU rnnout = Bidirectional(GRU(128, activation='linear', return_sequences=True))(a1) rnnout_gate = Bidirectional(GRU(128, activation='sigmoid', return_sequences=True))(a1) a2 = Multiply()([rnnout, rnnout_gate]) # Attention cla = TimeDistributed(Dense(num_classes, activation='sigmoid'), name='localization_layer')(a2) att = TimeDistributed(Dense(num_classes, activation='softmax'))(a2) out = Lambda(outfunc, output_shape=(num_classes,))([cla, att]) model = Model(input_logmel, out) model.summary() adam_optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) # Compile model model.compile(loss='binary_crossentropy', optimizer=adam_optimizer, metrics=['accuracy'])#finn delme dec1 you can change this to categorical_accuracy to see if you can subvert the keras error. However you dont know if its the right hting to do. Keep a look out at the results to determineif it did what you wanted # Save model callback print("working here 1") filepath = os.path.join(args.out_model_dir, "{0}_{1}.hdf5".format(args.model_name, args.epochs)) create_folder(os.path.dirname(filepath)) save_model = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) # Data generator # Train t_train = time.time() print("FINN Training started")#this is really just me seeing if this is where most of the time is spent use_generator = False if use_generator: gen = RatioDataGenerator(batch_size=args.batch_size, type='train')#batch size should be manipulated from 44 model.fit_generator(generator=gen.generate([tr_x], [tr_y]), steps_per_epoch=args.steps_p_epoch, # 100 iters is called an 'epoch' epochs=args.epochs, #31 # Maximum 'epoch' to train verbose=1, callbacks=[save_model], validation_data=(te_x, te_y)) else: model.fit(x=tr_x, y=tr_y, batch_size=20, epochs=args.epochs, verbose=1, callbacks=[save_model], validation_split=0.05, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=args.init_epoch, steps_per_epoch=None, validation_steps=None) model.save(os.path.join(args.out_model_dir, "final_model_{}_{}epochs.h5".format(args.model_name, args.epochs)))#am not sure if fit will save the final epoch.. pretty sure it does tho print("FINN Training finished, time taken: ", (time.time()-t_train))#this is really just me seeing if this is where most of the time is spent
def recognize(args, at_bool, sed_bool): t_rec = time.time() print("FINN Recognize started") #print("recognize!")#todo remove (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1) x = te_x y = te_y na_list = te_na_list #print("delme dec 1, num_classes", cfg.num_classes)#num_classes 3 x = do_scale(x, args.scaler_path, verbose=1) fusion_at_list = [] fusion_sed_list = [] #for epoch in range(20, 30, 1):#hmm this value might need to be changed depending on nmber of epochs in model.fit_generator(... although this isnt a train session so maybe not! for epoch in range(1, args.epochs+1):#chane this when you want to increase epochs and reduce the amount of epochs pred'd over eg: range(20, epochs, 1)# range(1, args.epochs+1) is for args.epochs epochs btw #this allows you to go over all the epochs and average the scroe. but why? t1 = time.time() model_path = os.path.join(args.model_dir, "{0}_{1}.hdf5".format(args.model_name, epoch)) print("model_path", model_path) model = load_model(model_path) # Audio tagging if at_bool: #dec4 this is where we find that shapes change from y having (#, n_classes) to (#, 2)... Dont know how to fix but i think its in changing the keras model #x.shape delme (22, 240, 64) #pred.shape delme (22, 2) t_pred = time.time() print("FINN at_pred started") pred = model.predict(x) print("FINN at_pred ended", (time.time()-t_pred)) fusion_at_list.append(pred) # Sound event detection if sed_bool: t_pred = time.time() print("FINN pred_sed started") in_layer = model.get_layer('in_layer') loc_layer = model.get_layer('localization_layer') func = K.function([in_layer.input, K.learning_phase()], [loc_layer.output]) pred3d = run_func(func, x, batch_size=20) fusion_sed_list.append(pred3d) print("FINN pred_sed ended", (time.time()-t_pred)) print("Prediction time: %s" % (time.time() - t1,)) # Write out AT probabilities if at_bool: fusion_at = np.mean(np.array(fusion_at_list), axis=0) print("AT shape: %s" % (fusion_at.shape,)) io_task4.at_write_prob_mat_to_csv( na_list=na_list, prob_mat=fusion_at, out_path=os.path.join(args.out_dir, "at_prob_mat.csv.gz")) # Write out SED probabilites if sed_bool: fusion_sed = np.mean(np.array(fusion_sed_list), axis=0) print("SED shape:%s" % (fusion_sed.shape,)) io_task4.sed_write_prob_mat_list_to_csv( na_list=na_list, prob_mat_list=fusion_sed, out_path=os.path.join(args.out_dir, "sed_prob_mat_list.csv.gz")) print("FINN Prediction finished!, time: ", (time.time()-t_rec))#this is really just me seeing if this is where most of the time is spent)
def train(args): num_classes = cfg.num_classes # Load training & testing data (tr_x, tr_y, tr_na_list) = load_hdf5_data(args.tr_hdf5_path, verbose=1) (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1) print("tr_x.shape: %s" % (tr_x.shape,)) # Scale data tr_x = do_scale(tr_x, args.scaler_path, verbose=1) te_x = do_scale(te_x, args.scaler_path, verbose=1) # Build model (_, n_time, n_freq) = tr_x.shape # (N, 240, 64) input_logmel = Input(shape=(n_time, n_freq), name='in_layer') # (N, 240, 64) a1 = Reshape((n_time, n_freq, 1))(input_logmel) # (N, 240, 64, 1) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 32, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 16, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 8, 128) a1 = block(a1) a1 = block(a1) a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 4, 128) a1 = Conv2D(256, (3, 3), padding="same", activation="relu", use_bias=True)(a1) a1 = MaxPooling2D(pool_size=(1, 4))(a1) # (N, 240, 1, 256) a1 = Reshape((240, 256))(a1) # (N, 240, 256) # Gated BGRU rnnout = Bidirectional(GRU(128, activation='linear', return_sequences=True))(a1) rnnout_gate = Bidirectional(GRU(128, activation='sigmoid', return_sequences=True))(a1) a2 = Multiply()([rnnout, rnnout_gate]) # Attention cla = TimeDistributed(Dense(num_classes, activation='sigmoid'), name='localization_layer')(a2) att = TimeDistributed(Dense(num_classes, activation='softmax'))(a2) out = Lambda(outfunc, output_shape=(num_classes,))([cla, att]) model = Model(input_logmel, out) model.summary() # Compile model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # Save model callback filepath = os.path.join(args.out_model_dir, "gatedAct_rationBal44_lr0.001_normalization_at_cnnRNN_64newMel_240fr.{epoch:02d}-{val_acc:.4f}.hdf5") create_folder(os.path.dirname(filepath)) save_model = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) # Data generator gen = RatioDataGenerator(batch_size=44, type='train') # Train model.fit_generator(generator=gen.generate([tr_x], [tr_y]), steps_per_epoch=100, # 100 iters is called an 'epoch' epochs=31, # Maximum 'epoch' to train verbose=1, callbacks=[save_model], validation_data=(te_x, te_y))