예제 #1
0
def recognize(args, at_bool, sed_bool):
    (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1)
    x = te_x
    y = te_y
    na_list = te_na_list
    x = do_scale(x, args.scaler_path, verbose=1)

    fusion_at_list = []
    fusion_sed_list = []
    for epoch in range(20, 30, 1):
        t1 = time.time()

        print(epoch)
        model_p = "gatedAct_rationBal44_lr0.001_normalization_at_cnnRNN_64newMel_240fr.06-0.8333.hdf5"
        #[model_path] = glob.glob(os.path.join(args.model_dir,
        #    "*.%02d-0.*.hdf5" % epoch))
        [model_path] = glob.glob(os.path.join(args.model_dir, model_p))

        model = load_model(model_path)

        # Audio tagging
        if at_bool:
            pred = model.predict(x)
            fusion_at_list.append(pred)

        # Sound event detection
        if sed_bool:
            in_layer = model.get_layer('in_layer')
            loc_layer = model.get_layer('localization_layer')
            func = K.function(
                [in_layer.input, K.learning_phase()], [loc_layer.output])
            pred3d = run_func(func, x, batch_size=20)
            fusion_sed_list.append(pred3d)

        print("Prediction time: %s" % (time.time() - t1, ))

    # Write out AT probabilities
    if at_bool:
        fusion_at = np.mean(np.array(fusion_at_list), axis=0)
        print("AT shape: %s" % (fusion_at.shape, ))
        io_task4.at_write_prob_mat_to_csv(na_list=na_list,
                                          prob_mat=fusion_at,
                                          out_path=os.path.join(
                                              args.out_dir,
                                              "at_prob_mat.csv.gz"))

    # Write out SED probabilites
    if sed_bool:
        fusion_sed = np.mean(np.array(fusion_sed_list), axis=0)
        print("SED shape:%s" % (fusion_sed.shape, ))
        io_task4.sed_write_prob_mat_list_to_csv(
            na_list=na_list,
            prob_mat_list=fusion_sed,
            out_path=os.path.join(args.out_dir, "sed_prob_mat_list.csv.gz"))

    print("Prediction finished!")
def recognize(args, at_bool, sed_bool):
    (te_x, _, te_na_list) = load_hdf5_data(args.hdf5_path, verbose=1)
    x = te_x
    na_list = te_na_list
    
    # x[:, :64] = do_scale(x[:, :64], args.scaler_path, verbose=1)
    
    fusion_at_list = []
    fusion_sed_list = []
    for epoch in range(25, 30, 1):
        t1 = time.time()

        file_name = os.path.join(args.model_dir, "*.%02d-0.*.hdf5" % epoch)
        
        model_path = glob.glob(file_name)[0] # returns more than one item so can't unpack
        model = load_model(model_path)
        
        # Audio tagging
        if at_bool:
            pred = model.predict(x, batch_size=5)
            fusion_at_list.append(pred)
        
        # Sound event detection
        if sed_bool:
            in_layer = model.get_layer('in_layer')
            loc_layer = model.get_layer('localization_layer')
            func = K.function([in_layer.input, K.learning_phase()], 
                              [loc_layer.output])
            pred3d = run_func(func, x, batch_size=20)
            fusion_sed_list.append(pred3d)
        
        print("Prediction time: %s" % (time.time() - t1,))
    
    # Write out AT probabilities
    if at_bool:
        fusion_at = np.mean(np.array(fusion_at_list), axis=0)
        print("AT shape: %s" % (fusion_at.shape,))
        io_task4.at_write_prob_mat_to_csv(
            na_list=na_list, 
            prob_mat=fusion_at, 
            out_path=args.out_dir) # "at_audio_prob_mat.csv.gz"
    
    # Write out SED probabilites
    if sed_bool:
        fusion_sed = np.mean(np.array(fusion_sed_list), axis=0)
        print("SED shape:%s" % (fusion_sed.shape,))
        io_task4.sed_write_prob_mat_list_to_csv(
            na_list=na_list, 
            prob_mat_list=fusion_sed, 
            out_path=os.path.join(args.out_dir, "sed_prob_mat_list.csv.gz"))
            
    print("Prediction finished!")
def train(args):
    num_classes = cfg.num_classes
    
    # Load training & testing data
    (tr_x, tr_y, tr_na_list) = load_hdf5_data(args.tr_hdf5_path, verbose=1)
    (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1)
    print("tr_x.shape: %s" % (tr_x.shape,))#removed this dec4 since its not helpful really

    # Scale data
    tr_x = do_scale(tr_x, args.scaler_path, verbose=1)
    te_x = do_scale(te_x, args.scaler_path, verbose=1)
    #print("delme dec 1, tr_x.shape", tr_x.shape)#output=51, 240, 64
    #print("delme dec 1, te_x.shape", te_x.shape)#:51, 240, 64
    # Build model
    (_, n_time, n_freq) = tr_x.shape    # (N, 240, 64)
    input_logmel = Input(shape=(n_time, n_freq), name='in_layer')   # (N, 240, 64)
    a1 = Reshape((n_time, n_freq, 1))(input_logmel) # (N, 240, 64, 1)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 32, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 16, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 8, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 4, 128)
    
    a1 = Conv2D(256, (3, 3), padding="same", activation="relu", use_bias=True)(a1)
    a1 = MaxPooling2D(pool_size=(1, 4))(a1) # (N, 240, 1, 256)
    
    a1 = Reshape((240, 256))(a1) # (N, 240, 256)
    
    # Gated BGRU
    rnnout = Bidirectional(GRU(128, activation='linear', return_sequences=True))(a1)
    rnnout_gate = Bidirectional(GRU(128, activation='sigmoid', return_sequences=True))(a1)
    a2 = Multiply()([rnnout, rnnout_gate])
    
    # Attention
    cla = TimeDistributed(Dense(num_classes, activation='sigmoid'), name='localization_layer')(a2)
    att = TimeDistributed(Dense(num_classes, activation='softmax'))(a2)
    out = Lambda(outfunc, output_shape=(num_classes,))([cla, att])

    model = Model(input_logmel, out)
    model.summary()
    adam_optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
    # Compile model
    model.compile(loss='binary_crossentropy',
                  optimizer=adam_optimizer,
                  metrics=['accuracy'])#finn delme dec1 you can change this to categorical_accuracy to see if you can subvert the keras error. However you dont know if its the right hting to do. Keep a look out at the results to determineif it did what you wanted
    
    # Save model callback
    print("working here 1")
    filepath = os.path.join(args.out_model_dir, "{0}_{1}.hdf5".format(args.model_name, args.epochs)) 
    create_folder(os.path.dirname(filepath))
    save_model = ModelCheckpoint(filepath=filepath,
                                 monitor='val_acc',
                                 verbose=0,
                                 save_best_only=False,
                                 save_weights_only=False,
                                 mode='auto',
                                 period=1)  
    
    # Data generator
        
    # Train
    t_train = time.time()
    print("FINN Training started")#this is really just me seeing if this is where most of the time is spent
    use_generator = False
    if use_generator:
        gen = RatioDataGenerator(batch_size=args.batch_size, type='train')#batch size should be manipulated from 44

        model.fit_generator(generator=gen.generate([tr_x], [tr_y]), 
                        steps_per_epoch=args.steps_p_epoch,    # 100 iters is called an 'epoch'
                        epochs=args.epochs, #31             # Maximum 'epoch' to train
                        verbose=1, 
                        callbacks=[save_model], 
                        validation_data=(te_x, te_y))
    else:
        model.fit(x=tr_x, y=tr_y, batch_size=20, epochs=args.epochs, verbose=1, callbacks=[save_model], validation_split=0.05, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=args.init_epoch, steps_per_epoch=None, validation_steps=None)
    model.save(os.path.join(args.out_model_dir, "final_model_{}_{}epochs.h5".format(args.model_name, args.epochs)))#am not sure if fit will save the final epoch.. pretty sure it does tho
    print("FINN Training finished, time taken: ", (time.time()-t_train))#this is really just me seeing if this is where most of the time is spent
def recognize(args, at_bool, sed_bool):
    t_rec = time.time()
    print("FINN Recognize started")
    #print("recognize!")#todo remove
    (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1)
    x = te_x
    y = te_y
    
    na_list = te_na_list
    #print("delme dec 1, num_classes", cfg.num_classes)#num_classes 3
    x = do_scale(x, args.scaler_path, verbose=1)

    fusion_at_list = []
    fusion_sed_list = []
    #for epoch in range(20, 30, 1):#hmm this value might need to be changed depending on nmber of epochs in model.fit_generator(... although this isnt a train session so maybe not!
    for epoch in range(1, args.epochs+1):#chane this when you want to increase epochs and reduce the amount of epochs pred'd over eg: range(20, epochs, 1)# range(1, args.epochs+1) is for args.epochs epochs btw
        #this allows you to go over all the epochs and average the scroe. but why?
        t1 = time.time()
        model_path = os.path.join(args.model_dir, "{0}_{1}.hdf5".format(args.model_name, epoch))
        print("model_path", model_path)
        model = load_model(model_path)
        
        # Audio tagging
        if at_bool:
            #dec4 this is where we find that shapes change from y having (#, n_classes) to (#, 2)... Dont know how to fix but i think its in changing the keras model
            #x.shape delme (22, 240, 64)
            #pred.shape delme (22, 2)
            t_pred = time.time()
            print("FINN at_pred started")
            pred = model.predict(x)
            print("FINN at_pred ended", (time.time()-t_pred))
            fusion_at_list.append(pred)
        
        # Sound event detection
        if sed_bool:
            t_pred = time.time()
            print("FINN pred_sed started")
            in_layer = model.get_layer('in_layer')
            loc_layer = model.get_layer('localization_layer')
            func = K.function([in_layer.input, K.learning_phase()], 
                              [loc_layer.output])
            pred3d = run_func(func, x, batch_size=20)
            fusion_sed_list.append(pred3d)
            print("FINN pred_sed ended", (time.time()-t_pred))
        print("Prediction time: %s" % (time.time() - t1,))
    
    # Write out AT probabilities
    if at_bool:
        fusion_at = np.mean(np.array(fusion_at_list), axis=0)
        print("AT shape: %s" % (fusion_at.shape,))
        
        io_task4.at_write_prob_mat_to_csv(
            na_list=na_list, 
            prob_mat=fusion_at, 
            out_path=os.path.join(args.out_dir, "at_prob_mat.csv.gz"))
    
    # Write out SED probabilites
    if sed_bool:
        fusion_sed = np.mean(np.array(fusion_sed_list), axis=0)
        print("SED shape:%s" % (fusion_sed.shape,))
        
        io_task4.sed_write_prob_mat_list_to_csv(
            na_list=na_list, 
            prob_mat_list=fusion_sed, 
            out_path=os.path.join(args.out_dir, "sed_prob_mat_list.csv.gz"))
            
    print("FINN Prediction finished!, time: ", (time.time()-t_rec))#this is really just me seeing if this is where most of the time is spent)
def train(args):
    num_classes = cfg.num_classes
    
    # Load training & testing data
    (tr_x, tr_y, tr_na_list) = load_hdf5_data(args.tr_hdf5_path, verbose=1)
    (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1)
    print("tr_x.shape: %s" % (tr_x.shape,))

    # Scale data
    tr_x = do_scale(tr_x, args.scaler_path, verbose=1)
    te_x = do_scale(te_x, args.scaler_path, verbose=1)
    
    # Build model
    (_, n_time, n_freq) = tr_x.shape    # (N, 240, 64)
    input_logmel = Input(shape=(n_time, n_freq), name='in_layer')   # (N, 240, 64)
    a1 = Reshape((n_time, n_freq, 1))(input_logmel) # (N, 240, 64, 1)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 32, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 16, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 8, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 4, 128)
    
    a1 = Conv2D(256, (3, 3), padding="same", activation="relu", use_bias=True)(a1)
    a1 = MaxPooling2D(pool_size=(1, 4))(a1) # (N, 240, 1, 256)
    
    a1 = Reshape((240, 256))(a1) # (N, 240, 256)
    
    # Gated BGRU
    rnnout = Bidirectional(GRU(128, activation='linear', return_sequences=True))(a1)
    rnnout_gate = Bidirectional(GRU(128, activation='sigmoid', return_sequences=True))(a1)
    a2 = Multiply()([rnnout, rnnout_gate])
    
    # Attention
    cla = TimeDistributed(Dense(num_classes, activation='sigmoid'), name='localization_layer')(a2)
    att = TimeDistributed(Dense(num_classes, activation='softmax'))(a2)
    out = Lambda(outfunc, output_shape=(num_classes,))([cla, att])
    
    model = Model(input_logmel, out)
    model.summary()
    
    # Compile model
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    
    # Save model callback
    filepath = os.path.join(args.out_model_dir, "gatedAct_rationBal44_lr0.001_normalization_at_cnnRNN_64newMel_240fr.{epoch:02d}-{val_acc:.4f}.hdf5")
    create_folder(os.path.dirname(filepath))
    save_model = ModelCheckpoint(filepath=filepath,
                                 monitor='val_acc', 
                                 verbose=0,
                                 save_best_only=False,
                                 save_weights_only=False,
                                 mode='auto',
                                 period=1)  

    # Data generator
    gen = RatioDataGenerator(batch_size=44, type='train')

    # Train
    model.fit_generator(generator=gen.generate([tr_x], [tr_y]), 
                        steps_per_epoch=100,    # 100 iters is called an 'epoch'
                        epochs=31,              # Maximum 'epoch' to train
                        verbose=1, 
                        callbacks=[save_model], 
                        validation_data=(te_x, te_y))