def detect(args):
    import log
    val_loss = log.history["val_loss"]
    val_acc = log.history["val_Acc"]
    choose = []
    for i in range(1):
        '''
        min_loss = np.argmin(val_loss)
        choose.append(min_loss)
        val_loss[min_loss] = np.inf
        '''
        max_acc = np.argmax(val_acc)
        choose.append(max_acc)
        val_acc[max_acc] = 0

    (te_x, te_y, te_na_list) = load_hdf5(args.te_hdf5_path, verbose=1)
    x = te_x
    y = te_y
    na_list = te_na_list

    x = do_scale(x, args.scaler_path, verbose=1)
    fusion_at_list = []
    fusion_sed_list = []
    #choose=[48]
    for epoch in choose:
        t1 = time.time()
        [model_path
         ] = glob.glob(os.path.join(args.model_dir, "*-%04d-*hdf5" % epoch))
        model = load_model(model_path,
                           custom_objects={
                               'focal_loss_fixed': focal_loss(),
                               'Acc': myacc()
                           })
        print("load the model: %s" % model_path)
        # Audio tagging
        pred = model.predict(x)
        fusion_at_list.append(pred)

        print("Prediction time: %s" % (time.time() - t1, ))
    # Write out AT probabilities
    fusion_at = np.mean(np.array(fusion_at_list), axis=0)
    print("AT shape: %s" % (fusion_at.shape, ))
    if os.path.exists("result"):
        shutil.rmtree("result")

    for audio_ind in range(fusion_at.shape[0]):
        #if na_list[audio_ind]=="mixture_babycry_0.0_0016_65021d74d0fb56db84b63896e2ff5ec9.wav":
        #    picture_path=os.path.join("result","picture",na_list[audio_ind].replace("wav","jpg"))
        #    pdb.set_trace()
        #    my_plot(fusion_at[audio_ind,...], y[audio_ind,...], picture_path)
        picture_path = os.path.join("result", "picture",
                                    na_list[audio_ind].replace("wav", "jpg"))
        #my_plot(fusion_at[audio_ind,...], y[audio_ind,...], picture_path)
        my_plot(fusion_at[audio_ind, ...],
                y[audio_ind, ...],
                picture_path,
                threshold=cfg.threshold)

    print("Prediction finished!")
Example #2
0
def recognize(args, at_bool, sed_bool):
    (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1)
    x = te_x
    y = te_y
    na_list = te_na_list
    x = do_scale(x, args.scaler_path, verbose=1)

    fusion_at_list = []
    fusion_sed_list = []
    for epoch in range(20, 30, 1):
        t1 = time.time()

        print(epoch)
        model_p = "gatedAct_rationBal44_lr0.001_normalization_at_cnnRNN_64newMel_240fr.06-0.8333.hdf5"
        #[model_path] = glob.glob(os.path.join(args.model_dir,
        #    "*.%02d-0.*.hdf5" % epoch))
        [model_path] = glob.glob(os.path.join(args.model_dir, model_p))

        model = load_model(model_path)

        # Audio tagging
        if at_bool:
            pred = model.predict(x)
            fusion_at_list.append(pred)

        # Sound event detection
        if sed_bool:
            in_layer = model.get_layer('in_layer')
            loc_layer = model.get_layer('localization_layer')
            func = K.function(
                [in_layer.input, K.learning_phase()], [loc_layer.output])
            pred3d = run_func(func, x, batch_size=20)
            fusion_sed_list.append(pred3d)

        print("Prediction time: %s" % (time.time() - t1, ))

    # Write out AT probabilities
    if at_bool:
        fusion_at = np.mean(np.array(fusion_at_list), axis=0)
        print("AT shape: %s" % (fusion_at.shape, ))
        io_task4.at_write_prob_mat_to_csv(na_list=na_list,
                                          prob_mat=fusion_at,
                                          out_path=os.path.join(
                                              args.out_dir,
                                              "at_prob_mat.csv.gz"))

    # Write out SED probabilites
    if sed_bool:
        fusion_sed = np.mean(np.array(fusion_sed_list), axis=0)
        print("SED shape:%s" % (fusion_sed.shape, ))
        io_task4.sed_write_prob_mat_list_to_csv(
            na_list=na_list,
            prob_mat_list=fusion_sed,
            out_path=os.path.join(args.out_dir, "sed_prob_mat_list.csv.gz"))

    print("Prediction finished!")
def train(args):
    if os.path.exists(args.out_model_dir):
        shutil.rmtree(args.out_model_dir)
    create_folder(args.out_model_dir)
    num_classes = cfg.num_classes
    # Load training & testing data
    (tr_x, tr_y, tr_na_list) = load_hdf5(args.tr_hdf5_path, verbose=1)
    (te_x, te_y, te_na_list) = load_hdf5(args.te_hdf5_path, verbose=1)
    print("")

    # Scale data
    tr_x = do_scale(tr_x, args.scaler_path, verbose=1)
    te_x = do_scale(te_x, args.scaler_path, verbose=1)
    # Build model
    (_, n_time, n_freq) = tr_x.shape

    #pdb.set_trace()

    input = Input(shape=(n_time, n_freq), name='input_layer')
    input_ = Reshape((n_time, n_freq, 1))(input)
    '''
    block1 = Conv_BN(input_, 8, (3, 3), act="relu")
    block1 = Conv_BN(block1, 32, (3, 3), act="relu")
    block1 = Conv_BN(block1, 64, (3, 3), act="relu")

    block1 = block_a(input_, 8)
    block1 = block_a(block1, 32)
    block1 = block_a(block1, 64)
    '''
    block1 = block_b(input_, 8)
    block1 = block_b(block1, 32)
    block1 = block_b(block1, 64)
    block1 = MaxPooling2D(pool_size=(1, 2))(block1)

    block2 = block_c(block1, 64)
    block2 = MaxPooling2D(pool_size=(1, 2))(block2)

    block3 = block_c(block2, 64)
    block3 = MaxPooling2D(pool_size=(1, 2))(block3)

    block4 = block_c(block3, 64)
    block4 = MaxPooling2D(pool_size=(1, 2))(block4)

    cnnout = Conv_BN(block4, 128, (1, 1), act="relu", bias=True)
    cnnout = MaxPooling2D(pool_size=(1, 2))(cnnout)
    cnnout = Reshape((240, 256))(cnnout)

    rnn = Bidirectional(
        GRU(128,
            activation='relu',
            return_sequences=True,
            kernel_regularizer=regularizers.l2(0.01),
            recurrent_regularizer=regularizers.l2(0.01)))(cnnout)

    out = TimeDistributed(Dense(
        num_classes,
        activation='softmax',
        kernel_regularizer=regularizers.l2(0.0),
    ),
                          name='output_layer')(rnn)

    model = Model(input, out)
    model.summary()

    # Compile model
    adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.009)
    sgd = optimizers.SGD(lr=0.01, momentum=0.9, decay=0.0)
    model.compile(loss=focal_loss(alpha=[1, 1, 1, 1], gamma=1),
                  optimizer="adam",
                  metrics=[myacc(threshold=0.5)])

    # Save model callback
    filepath = os.path.join(
        args.out_model_dir,
        "aed-batchsize_50-lr_0.01-{epoch:04d}-{val_Acc:.4f}.hdf5")
    save_model = ModelCheckpoint(filepath=filepath,
                                 monitor='val_Acc',
                                 verbose=0,
                                 save_best_only=False,
                                 save_weights_only=False,
                                 mode='auto',
                                 period=1)

    # Train
    '''
    history=model.fit(  x=tr_x, 
			y=tr_y, 
			batch_size=50, 
			epochs=200, 
			verbose=1,
			shuffle=True,
			class_weight="auto", 
			callbacks=[save_model], 
			validation_data=(te_x,te_y)
		      ) 

    '''
    # Data generator
    gen = Generator(batch_size=50, type='train')
    history = model.fit_generator(
        generator=gen.generate([tr_x], [tr_y]),
        steps_per_epoch=300,  # 100 iters is called an 'epoch'
        epochs=100,  # Maximum 'epoch' to train
        verbose=1,
        class_weight="auto",
        callbacks=[save_model],
        validation_data=(te_x, te_y))

    with open('src/log.py', 'w') as f:
        f.write("history=")
        f.write(str(history.history))
def train(args):
    num_classes = cfg.num_classes
    
    # Load training & testing data
    (tr_x, tr_y, tr_na_list) = load_hdf5_data(args.tr_hdf5_path, verbose=1)
    (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1)
    print("tr_x.shape: %s" % (tr_x.shape,))#removed this dec4 since its not helpful really

    # Scale data
    tr_x = do_scale(tr_x, args.scaler_path, verbose=1)
    te_x = do_scale(te_x, args.scaler_path, verbose=1)
    #print("delme dec 1, tr_x.shape", tr_x.shape)#output=51, 240, 64
    #print("delme dec 1, te_x.shape", te_x.shape)#:51, 240, 64
    # Build model
    (_, n_time, n_freq) = tr_x.shape    # (N, 240, 64)
    input_logmel = Input(shape=(n_time, n_freq), name='in_layer')   # (N, 240, 64)
    a1 = Reshape((n_time, n_freq, 1))(input_logmel) # (N, 240, 64, 1)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 32, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 16, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 8, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 4, 128)
    
    a1 = Conv2D(256, (3, 3), padding="same", activation="relu", use_bias=True)(a1)
    a1 = MaxPooling2D(pool_size=(1, 4))(a1) # (N, 240, 1, 256)
    
    a1 = Reshape((240, 256))(a1) # (N, 240, 256)
    
    # Gated BGRU
    rnnout = Bidirectional(GRU(128, activation='linear', return_sequences=True))(a1)
    rnnout_gate = Bidirectional(GRU(128, activation='sigmoid', return_sequences=True))(a1)
    a2 = Multiply()([rnnout, rnnout_gate])
    
    # Attention
    cla = TimeDistributed(Dense(num_classes, activation='sigmoid'), name='localization_layer')(a2)
    att = TimeDistributed(Dense(num_classes, activation='softmax'))(a2)
    out = Lambda(outfunc, output_shape=(num_classes,))([cla, att])

    model = Model(input_logmel, out)
    model.summary()
    adam_optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
    # Compile model
    model.compile(loss='binary_crossentropy',
                  optimizer=adam_optimizer,
                  metrics=['accuracy'])#finn delme dec1 you can change this to categorical_accuracy to see if you can subvert the keras error. However you dont know if its the right hting to do. Keep a look out at the results to determineif it did what you wanted
    
    # Save model callback
    print("working here 1")
    filepath = os.path.join(args.out_model_dir, "{0}_{1}.hdf5".format(args.model_name, args.epochs)) 
    create_folder(os.path.dirname(filepath))
    save_model = ModelCheckpoint(filepath=filepath,
                                 monitor='val_acc',
                                 verbose=0,
                                 save_best_only=False,
                                 save_weights_only=False,
                                 mode='auto',
                                 period=1)  
    
    # Data generator
        
    # Train
    t_train = time.time()
    print("FINN Training started")#this is really just me seeing if this is where most of the time is spent
    use_generator = False
    if use_generator:
        gen = RatioDataGenerator(batch_size=args.batch_size, type='train')#batch size should be manipulated from 44

        model.fit_generator(generator=gen.generate([tr_x], [tr_y]), 
                        steps_per_epoch=args.steps_p_epoch,    # 100 iters is called an 'epoch'
                        epochs=args.epochs, #31             # Maximum 'epoch' to train
                        verbose=1, 
                        callbacks=[save_model], 
                        validation_data=(te_x, te_y))
    else:
        model.fit(x=tr_x, y=tr_y, batch_size=20, epochs=args.epochs, verbose=1, callbacks=[save_model], validation_split=0.05, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=args.init_epoch, steps_per_epoch=None, validation_steps=None)
    model.save(os.path.join(args.out_model_dir, "final_model_{}_{}epochs.h5".format(args.model_name, args.epochs)))#am not sure if fit will save the final epoch.. pretty sure it does tho
    print("FINN Training finished, time taken: ", (time.time()-t_train))#this is really just me seeing if this is where most of the time is spent
def recognize(args, at_bool, sed_bool):
    t_rec = time.time()
    print("FINN Recognize started")
    #print("recognize!")#todo remove
    (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1)
    x = te_x
    y = te_y
    
    na_list = te_na_list
    #print("delme dec 1, num_classes", cfg.num_classes)#num_classes 3
    x = do_scale(x, args.scaler_path, verbose=1)

    fusion_at_list = []
    fusion_sed_list = []
    #for epoch in range(20, 30, 1):#hmm this value might need to be changed depending on nmber of epochs in model.fit_generator(... although this isnt a train session so maybe not!
    for epoch in range(1, args.epochs+1):#chane this when you want to increase epochs and reduce the amount of epochs pred'd over eg: range(20, epochs, 1)# range(1, args.epochs+1) is for args.epochs epochs btw
        #this allows you to go over all the epochs and average the scroe. but why?
        t1 = time.time()
        model_path = os.path.join(args.model_dir, "{0}_{1}.hdf5".format(args.model_name, epoch))
        print("model_path", model_path)
        model = load_model(model_path)
        
        # Audio tagging
        if at_bool:
            #dec4 this is where we find that shapes change from y having (#, n_classes) to (#, 2)... Dont know how to fix but i think its in changing the keras model
            #x.shape delme (22, 240, 64)
            #pred.shape delme (22, 2)
            t_pred = time.time()
            print("FINN at_pred started")
            pred = model.predict(x)
            print("FINN at_pred ended", (time.time()-t_pred))
            fusion_at_list.append(pred)
        
        # Sound event detection
        if sed_bool:
            t_pred = time.time()
            print("FINN pred_sed started")
            in_layer = model.get_layer('in_layer')
            loc_layer = model.get_layer('localization_layer')
            func = K.function([in_layer.input, K.learning_phase()], 
                              [loc_layer.output])
            pred3d = run_func(func, x, batch_size=20)
            fusion_sed_list.append(pred3d)
            print("FINN pred_sed ended", (time.time()-t_pred))
        print("Prediction time: %s" % (time.time() - t1,))
    
    # Write out AT probabilities
    if at_bool:
        fusion_at = np.mean(np.array(fusion_at_list), axis=0)
        print("AT shape: %s" % (fusion_at.shape,))
        
        io_task4.at_write_prob_mat_to_csv(
            na_list=na_list, 
            prob_mat=fusion_at, 
            out_path=os.path.join(args.out_dir, "at_prob_mat.csv.gz"))
    
    # Write out SED probabilites
    if sed_bool:
        fusion_sed = np.mean(np.array(fusion_sed_list), axis=0)
        print("SED shape:%s" % (fusion_sed.shape,))
        
        io_task4.sed_write_prob_mat_list_to_csv(
            na_list=na_list, 
            prob_mat_list=fusion_sed, 
            out_path=os.path.join(args.out_dir, "sed_prob_mat_list.csv.gz"))
            
    print("FINN Prediction finished!, time: ", (time.time()-t_rec))#this is really just me seeing if this is where most of the time is spent)
def train(args):
    num_classes = cfg.num_classes
    
    # Load training & testing data
    (tr_x, tr_y, tr_na_list) = load_hdf5_data(args.tr_hdf5_path, verbose=1)
    (te_x, te_y, te_na_list) = load_hdf5_data(args.te_hdf5_path, verbose=1)
    print("tr_x.shape: %s" % (tr_x.shape,))

    # Scale data
    tr_x = do_scale(tr_x, args.scaler_path, verbose=1)
    te_x = do_scale(te_x, args.scaler_path, verbose=1)
    
    # Build model
    (_, n_time, n_freq) = tr_x.shape    # (N, 240, 64)
    input_logmel = Input(shape=(n_time, n_freq), name='in_layer')   # (N, 240, 64)
    a1 = Reshape((n_time, n_freq, 1))(input_logmel) # (N, 240, 64, 1)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 32, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 16, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 8, 128)
    
    a1 = block(a1)
    a1 = block(a1)
    a1 = MaxPooling2D(pool_size=(1, 2))(a1) # (N, 240, 4, 128)
    
    a1 = Conv2D(256, (3, 3), padding="same", activation="relu", use_bias=True)(a1)
    a1 = MaxPooling2D(pool_size=(1, 4))(a1) # (N, 240, 1, 256)
    
    a1 = Reshape((240, 256))(a1) # (N, 240, 256)
    
    # Gated BGRU
    rnnout = Bidirectional(GRU(128, activation='linear', return_sequences=True))(a1)
    rnnout_gate = Bidirectional(GRU(128, activation='sigmoid', return_sequences=True))(a1)
    a2 = Multiply()([rnnout, rnnout_gate])
    
    # Attention
    cla = TimeDistributed(Dense(num_classes, activation='sigmoid'), name='localization_layer')(a2)
    att = TimeDistributed(Dense(num_classes, activation='softmax'))(a2)
    out = Lambda(outfunc, output_shape=(num_classes,))([cla, att])
    
    model = Model(input_logmel, out)
    model.summary()
    
    # Compile model
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    
    # Save model callback
    filepath = os.path.join(args.out_model_dir, "gatedAct_rationBal44_lr0.001_normalization_at_cnnRNN_64newMel_240fr.{epoch:02d}-{val_acc:.4f}.hdf5")
    create_folder(os.path.dirname(filepath))
    save_model = ModelCheckpoint(filepath=filepath,
                                 monitor='val_acc', 
                                 verbose=0,
                                 save_best_only=False,
                                 save_weights_only=False,
                                 mode='auto',
                                 period=1)  

    # Data generator
    gen = RatioDataGenerator(batch_size=44, type='train')

    # Train
    model.fit_generator(generator=gen.generate([tr_x], [tr_y]), 
                        steps_per_epoch=100,    # 100 iters is called an 'epoch'
                        epochs=31,              # Maximum 'epoch' to train
                        verbose=1, 
                        callbacks=[save_model], 
                        validation_data=(te_x, te_y))