def spectrograms(input_type, data_list, labelFile, savePath, fft_size, win_size, hop_size, duration, data_window=100, window_shift=10, augment=True, save=True, minimum_length=1): from audio import compute_spectrogram spectrograms = list() labels = list() print('Computing the ' + input_type + ' spectrograms !!') with open(data_list, 'r') as f: spectrograms = [ compute_spectrogram(input_type, file.strip(), fft_size, win_size, hop_size, duration, augment, minimum_length) for file in f ] # Get the labels into a list and save it along with the spectrograms with open(labelFile, 'r') as f: #labels = [1 if line.strip().split(' ')[1] == 'genuine' else 0 for line in f] labels = [line.strip() for line in f] if augment: new_data = list() new_labels = list() assert (len(labels) == len(spectrograms)) print( 'Now performing augmentation using sliding window mechanism on original spectrogram .... ' ) for i in range(len(spectrograms)): d, l = augment_data(spectrograms[i], labels[i], data_window, input_type, window_shift) new_data.extend( d) # extend the list rather than adding it into a new list new_labels.extend(l) spectrograms = new_data labels = new_labels if save: from helper import makeDirectory makeDirectory(savePath) outfile = savePath + '/spec' with open(outfile, 'w') as f: np.savez(outfile, spectrograms=spectrograms, labels=labels) print('Finished computing spectrogram and saved inside: ', savePath)
def run_prediction(model_path, featType, dataType, protocal, inputPath, mean_std_file, outBase, batch_size=100, activation='elu', init_type='xavier', targets=2, fftSize=256, architecture=2, duration=1, padding=True, n_model=None, inputType='mag_spec', augment=True): # Extract Features from Training set #print('Extracting ' + featType + ' for the ' + dataType + 'set') print('outBase in run_prediction is: ', outBase) data, lab = dataset.load_data(inputPath + dataType + '/') #data = dataset.normalise_data(data,mean_std_file,'utterance') data = dataset.normalise_data(data, mean_std_file, 'global_mv') labels = dataset.get_labels_according_to_targets(lab, targets) featureList = getFeatures(featType, inputType, data, labels, batch_size, model_path, n_model, activation, init_type, targets, fftSize, padding, architecture, duration, augment) if featType == 'bottleneck': makeDirectory(outBase + '/features/') # dataType) saveFeatures(featureList, outBase + '/features/' + dataType) #saves as train.npz, dev.npz etc elif featType == 'scores': makeDirectory(outBase + '/predictions/') write_scores_to_file(featureList, outfile=outBase + '/predictions/' + str(dataType) + '_prediction.txt') else: print('PLEASE CHOSE CORRECT PARAM !!')
def trainCNN_on_Bulbul_architecture(): #CNN Training parameters activation = 'elu' init_type = 'xavier' batch_size = 32 epochs = 200 #0 # Regularizer parameters use_lr_decay = False #set this flag for LR decay wDecayFlag = False #whether to perform L2 weight decay or not lossPenalty = 0.001 # Using lambda=0.001 . applyBatchNorm = False deviceId = "/gpu:0" # Adam parameters optimizer_type = 'adam' b1 = 0.9 b2 = 0.999 epsilon = 0.1 momentum = 0.95 dropout1 = 0.6 #for input to first FC layer dropout3 = 0.5 #for intermediate layer input dropout2 = [0.5, 0.6] lambdas = [0.0005, 0.001] architectures = [ 2 ] # birds architecture 1, to make it unified (check model.py for definition) trainingSize = [1] #in seconds #lr= 0.0005 learning_rates = [0.0005, 0.0003, 0.0001, 0.005] targets = 2 fftSize = 256 specType = 'mag_spec' padding = True # Used following paths since I moved the scripts in git and used link so that code are synchronised spectrogramPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms/' tensorboardPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/tensorflow_log_dir/' modelPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/models/' for duration in trainingSize: print('Now loading the data !!') outPath = spectrogramPath + specType + '/' + str( fftSize) + 'FFT/' + str(duration) + 'sec/' mean_std_file = outPath + 'train/mean_std.npz' # Load training data, labels and perform norm tD = dataset.load_data(outPath + 'train/') tL = dataset.get_labels_according_to_targets(trainP, targets) if not os.path.exists(mean_std_file): print('Computing Mean_std file ..') dataset.compute_global_norm(tD, mean_std_file) print('Shape of labels: ', tL.shape) #tD = dataset.normalise_data(tD,mean_std_file,'utterance') # utterance level tD = dataset.normalise_data(tD, mean_std_file, 'global_mv') # global # Load dev data, labels and perform norm devD = dataset.load_data(outPath + 'dev/') devL = dataset.get_labels_according_to_targets(devP, targets) #devD = dataset.normalise_data(devD,mean_std_file,'utterance') devD = dataset.normalise_data(devD, mean_std_file, 'global_mv') ### We are training on TRAIN set and validating on DEV set t_data = tD t_labels = tL v_data = devD v_labels = devL for dropout in dropout2: architecture = architectures[0] for lr in learning_rates: hyp_str = '_cnnModel' + str( architecture) + '_keepProb_0.6_' + str(dropout) + str( dropout3) + 'lr' + str(lr) log_dir = tensorboardPath + '/birdsArch_max2000epochsTEMP/' + hyp_str model_save_path = modelPath + '/birdsArch_max2000epochsTEMP/' + hyp_str logfile = model_save_path + '/training.log' figDirectory = model_save_path makeDirectory(model_save_path) print('Training model with ' + str(duration) + ' sec data and cnnModel' + str(architecture)) tLoss, vLoss, tAcc, vAcc = model.train( architecture, fftSize, padding, duration, t_data, t_labels, v_data, v_labels, activation, lr, use_lr_decay, epsilon, b1, b2, momentum, optimizer_type, dropout1, dropout, dropout3, model_save_path, log_dir, logfile, wDecayFlag, lossPenalty, applyBatchNorm, init_type, epochs, batch_size, targets) #plot_2dGraph('#Epochs', 'Avg CE Loss', tLoss,vLoss,'train_ce','val_ce', figDirectory+'/loss.png') #plot_2dGraph('#Epochs', 'Avg accuracy', tAcc,vAcc,'train_acc','val_acc',figDirectory+'/acc.png') plot_2dGraph('#Epochs', 'Val loss and accuracy', vLoss, vAcc, 'val_loss', 'val_acc', figDirectory + '/v_ls_acc.png')
def trainCNN_on_trainData(): #CNN Training parameters activation = 'mfm' #choose activation: mfm,elu, relu, mfsoftmax, tanh ? init_type='xavier' #'truncated_normal' #'xavier' #or 'truncated_normal' batch_size = 32 epochs = 120 #1000 # Regularizer parameters wDecayFlag = False #whether to perform L2 weight decay or not lossPenalty = 0.001 # Using lambda=0.001 . applyBatchNorm = False deviceId = "/gpu:0" # Adam parameters optimizer_type = 'adam' b1=0.9 b2=0.999 epsilon=0.1 momentum=0.95 dropout1=0.1 #for input to first FC layer dropout2=0.2 #for intermediate layer input drops=[0.4] # 50% dropout the inputs of FC layers lambdas = [0.0005, 0.001] architectures = [1] trainingSize = [1] #in seconds use_lr_decay=True #set this flag for LR decay learning_rates=[0.0008,0.0006,0.0004,0.0001] #learning_rates=[0.0001, 0.001] #0.0001 #learning_rates=np.random.uniform(0.003,0.0005,10) #learning_rates=np.random.uniform(0.01,0.0001,10) #learning_rates=np.linspace(0.001,0.01,10) #learning_rates=[0.0002, 0.0003, 0.0004, 0.0005, 0.0006,0.0007,0.0008, 0.0009] targets=2 #specType='mag_spec' #lets try loading mag_spec #inputTypes=['mel_spec'] #'mag_spec' #'cqt_spec' ## Running on Hepworth ! inputTypes=['mag_spec'] # Not Run yet #inputTypes=['cqt_spec'] # Not Run yet #inputTypes=['mag_spec','cqt_spec','mel_spec'] padding=True augment = True trainPercentage=1.0 #Each training epoch will see only 50% of the original data at random ! valPercentage=0.8 if augment: spectrogramPath='/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms_augmented/1sec_shift/' else: spectrogramPath='/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms/' # Used following paths since I moved the scripts in git and used link so that code are synchronised tensorboardPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/tensorflow_log_dir/' modelPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/models_augmented/' #for duration in trainingSize: duration=1 fftSize=512 for specType in inputTypes: #print('Now loading the data with FFT size: ', fftSize) outPath = spectrogramPath +specType + '/' +str(fftSize)+ 'FFT/' + str(duration)+ 'sec/' mean_std_file = outPath+'train/mean_std.npz' # Load training data, labels and perform norm tD,tL = dataset.load_data(outPath+'train/') tL = dataset.get_labels_according_to_targets(tL, targets) assert(len(tD)==len(tL)) if not os.path.exists(mean_std_file): print('Computing Mean_std file ..') dataset.compute_global_norm(tD,mean_std_file) # We will try utterance based norm later #tD = dataset.normalise_data(tD,mean_std_file,'utterance') # utterance level tD = dataset.normalise_data(tD,mean_std_file,'global_mv') # global # Now take only 80% of the new augmented data to use for validation # Just to save some time during training devD,devL = dataset.get_random_data(outPath+'dev/',batch_size,valPercentage) devL = dataset.get_labels_according_to_targets(devL, targets) assert(len(devD)==len(devL)) #devD = dataset.normalise_data(devD,mean_std_file,'utterance') devD = dataset.normalise_data(devD,mean_std_file,'global_mv') ### We are training on TRAIN set and validating on DEV set t_data = tD t_labels = tL v_data = devD v_labels = devL print('Training model on ', specType) for dropout in drops: # dropout 1.0 and 0.5 to all inputs of DNN architecture = architectures[0] penalty=0.001 #this is not used thought at the moment for lr in learning_rates: #for targets in target_list: hyp_str='keep_'+str(dropout1)+'_'+str(dropout2)+'_'+str(dropout)+'_'+str(specType)+'_Lr'+str(lr) log_dir = tensorboardPath+ '/model1_max120epochs_32batch_with_0.85Decay/' +str(specType)+ '/' + hyp_str model_save_path = modelPath + '/model1_max120epochs_32batch_with_0.85Decay/'+str(specType) + '/' + hyp_str logfile = model_save_path+'/training.log' figDirectory = model_save_path makeDirectory(model_save_path) tLoss,vLoss,tAcc,vAcc=model.train(specType,architecture,fftSize,padding,duration,t_data,t_labels, v_data,v_labels,activation,lr,use_lr_decay,epsilon,b1,b2,momentum, optimizer_type,dropout1,dropout2,dropout,model_save_path,log_dir, logfile,wDecayFlag,penalty,applyBatchNorm,init_type,epochs,batch_size, targets,augment)#,trainPercentage,valPercentage)
def trainCNN_on_trainData(): #CNN Training parameters activation = 'mfm' #choose activation: mfm,elu, relu, mfsoftmax, tanh ? init_type='xavier' #'truncated_normal' #'xavier' #or 'truncated_normal' batch_size = 32 epochs = 2000 # Regularizer parameters use_lr_decay=False #set this flag for LR decay wDecayFlag = False #whether to perform L2 weight decay or not lossPenalty = 0.001 # Using lambda=0.001 . applyBatchNorm = False deviceId = "/gpu:0" # Adam parameters optimizer_type = 'adam' b1=0.9 b2=0.999 epsilon=0.1 momentum=0.95 #dropout1=1.0 #for input to first FC layer #dropout2=1.0 #for intermediate layer input drops=[1.0] # No dropout lambdas = [0.001] architectures = [1] trainingSize = [1] #in seconds lr= 0.0001 targets=2 #specType='mag_spec' #lets try loading mag_spec #inputTypes=['mel_spec'] #'mag_spec' #'cqt_spec' ## Running on Hepworth ! #inputTypes=['mag_spec'] # Not Run yet #inputTypes=['cqt_spec'] # Not Run yet inputTypes=['mel_spec','mag_spec','cqt_spec'] #CQT may throw error during scoring in model.py padding=True augment = True trainPercentage=0.8 #Each training epoch will see only 80% of the original data at random ! valPercentage=0.3 #30% only used at random for validation if augment: spectrogramPath='/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms_augmented/1sec_shift/' else: spectrogramPath='/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms/' # Used following paths since I moved the scripts in git and used link so that code are synchronised tensorboardPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/tensorflow_log_dir/' modelPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/models_augmented/' #for duration in trainingSize: duration=1 for specType in inputTypes: if specType == 'mel_spec' or specType == 'cqt_spec': fftSize=512 else: fftSize=256 print('Now loading the data with FFT size: ', fftSize) outPath = spectrogramPath +specType + '/' +str(fftSize)+ 'FFT/' + str(duration)+ 'sec/' mean_std_file = outPath+'train/mean_std.npz' # Load training data, labels and perform norm tD,tL = dataset.load_data(outPath+'train/') tL = dataset.get_labels_according_to_targets(tL, targets) assert(len(tD)==len(tL)) if not os.path.exists(mean_std_file): print('Computing Mean_std file ..') dataset.compute_global_norm(tD,mean_std_file) tD = dataset.normalise_data(tD,mean_std_file,'utterance') # utterance level tD = dataset.normalise_data(tD,mean_std_file,'global_mv') # global # Load dev data, labels and perform norm devD,devL = dataset.load_data(outPath+'dev/') devL = dataset.get_labels_according_to_targets(devL, targets) assert(len(devD)==len(devL)) devD = dataset.normalise_data(devD,mean_std_file,'utterance') devD = dataset.normalise_data(devD,mean_std_file,'global_mv') ### We are training on TRAIN set and validating on DEV set t_data = tD t_labels = tL v_data = devD v_labels = devL print('Training model on ', specType) for dropout in drops: # dropout 1.0 and 0.5 to all inputs of DNN architecture = architectures[0] #penalty=0.001 #this is not used thought at the moment for penalty in lambdas: #for targets in target_list: hyp_str='arch'+str(architecture)+'_keep'+str(dropout)+'_'+str(specType)+'_targets'+str(targets) log_dir = tensorboardPath+ '/model1_max2000epochs_1/'+ hyp_str model_save_path = modelPath + '/model1_max2000epochs_1/'+ hyp_str logfile = model_save_path+'/training.log' figDirectory = model_save_path makeDirectory(model_save_path) tLoss,vLoss,tAcc,vAcc=model.train(specType,architecture,fftSize,padding,duration,t_data,t_labels, v_data,v_labels,activation,lr,use_lr_decay,epsilon,b1,b2,momentum, optimizer_type,dropout,dropout,dropout,model_save_path,log_dir, logfile,wDecayFlag,penalty,applyBatchNorm,init_type,epochs,batch_size, targets,augment,trainPercentage,valPercentage) #plot_2dGraph('#Epochs', 'Avg CE Loss', tLoss,vLoss,'train_ce','val_ce', figDirectory+'/loss.png') #plot_2dGraph('#Epochs', 'Avg accuracy', tAcc,vAcc,'train_acc','val_acc',figDirectory+'/acc.png') plot_2dGraph('#Epochs', 'Val loss and accuracy', vLoss,vAcc,'val_loss','val_acc',figDirectory+'/v_ls_acc.png')
def trainCNN_on_handcrafted_features(): #CNN Training parameters activation = 'mfm' #choose activation: mfm,elu, relu, mfsoftmax, tanh ? init_type = 'xavier' #'truncated_normal' #'xavier' #or 'truncated_normal' batch_size = 32 epochs = 200 #500 # Regularizer parameters wDecayFlag = False #whether to perform L2 weight decay or not lossPenalty = 0.001 # Using lambda=0.001 . applyBatchNorm = False deviceId = "/gpu:0" # Adam parameters optimizer_type = 'adam' b1 = 0.9 b2 = 0.999 epsilon = 0.1 momentum = 0.95 #0.95 dropout1 = 0.5 #0.3 #for input to first FC layer dropout2 = 0.5 #0.3 #for intermediate layer input drops = [0.5] # 50% dropout the inputs of FC layers lambdas = [0.0005, 0.001] targets = 2 architectures = [1] trainingSize = [1] #in seconds use_lr_decay = True learning_rates = [0.0022, 0.0008, 0.005, 0.0004] #learning_rates=np.random.uniform(0.003,0.0005,10) #Take randomly drawn lr from uniform distribution between 0.001-0.0001 #learning_rates=np.random.uniform(0.01,0.0001,10) # Note: LR =0.01 explodes badly. We get the huge cross entropy values we used to get before. #inputTypes=['CQCC','LFCC','LPCC','MFCC','RFCC'] #'IMFCC' # i ran this in 1 kapoor #inputTypes=['IMFCC', 'SCMC'] # this was running on kapoor 0 inputTypes = ['RFCC', 'LFCC', 'SCMC', 'IMFCC'] # this is running on kapoor 1 padding = True augment = True trainPercentage = 1.0 #Each training epoch will see only 50% of the original data at random ! valPercentage = 1.0 if augment: spectrogramPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/features_1sec_shift/' else: spectrogramPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/features/' # Used following paths since I moved the scripts in git and used link so that code are synchronised tensorboardPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/tensorflow_log_dir/' modelPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/models_augmented/' duration = 1 fftSize = 512 for specType in inputTypes: outPath = spectrogramPath + specType + '/' mean_std_file = outPath + 'train/mean_std.npz' # Load training data, labels and perform norm tD, tL = dataset.load_data(outPath + 'train/') tL = dataset.get_labels_according_to_targets(tL, targets) assert (len(tD) == len(tL)) if not os.path.exists(mean_std_file): print('Computing Mean_std file ..') dataset.compute_global_norm(tD, mean_std_file) # We will try utterance based norm later #tD = dataset.normalise_data(tD,mean_std_file,'utterance') # utterance level tD = dataset.normalise_data(tD, mean_std_file, 'global_mv') # global # Now take only 80% of the new augmented data to use for validation # Just to save some time during training #devD,devL = dataset.get_random_data(outPath+'dev/',batch_size,valPercentage) devD, devL = dataset.load_data(outPath + 'dev/') devL = dataset.get_labels_according_to_targets(devL, targets) assert (len(devD) == len(devL)) #devD = dataset.normalise_data(devD,mean_std_file,'utterance') devD = dataset.normalise_data(devD, mean_std_file, 'global_mv') ### We are training on TRAIN set and validating on DEV set t_data = tD t_labels = tL v_data = devD v_labels = devL print('Training model on ', specType) for dropout in drops: # Just dropout 1.0 and 0.5 to all inputs of DNN architecture = architectures[0] penalty = 0.001 for lr in learning_rates: hyp_str = 'keep_' + str(dropout1) + '_' + str( dropout2) + '_' + str(dropout) + '_' + str( specType) + '_lr_' + str(lr) print('Hyper-parameter string is: ', hyp_str) log_dir = tensorboardPath + '/model1_120max_handcrafted_with_0.85Decay/' + str( specType) + 'drops' + '/' + hyp_str model_save_path = modelPath + '/model1_120max_handcrafted_with_0.85Decay/' + str( specType) + 'drops' + '/' + hyp_str logfile = model_save_path + '/training.log' figDirectory = model_save_path makeDirectory(model_save_path) tLoss, vLoss, tAcc, vAcc = model.train( specType, architecture, fftSize, padding, duration, t_data, t_labels, v_data, v_labels, activation, lr, use_lr_decay, epsilon, b1, b2, momentum, optimizer_type, dropout1, dropout2, dropout, model_save_path, log_dir, logfile, wDecayFlag, penalty, applyBatchNorm, init_type, epochs, batch_size, targets, augment) #,trainPercentage,valPercentage)
def trainCNN_on_Sparrow_architecture(): #CNN Training parameters activation = 'elu' #'elu' init_type = 'xavier' batch_size = 32 epochs = 1000 # Regularizer parameters use_lr_decay = False #set this flag for LR decay wDecayFlag = False #whether to perform L2 weight decay or not lossPenalty = 0.001 # Using lambda=0.001 . applyBatchNorm = False deviceId = "/gpu:0" # Adam parameters optimizer_type = 'adam' b1 = 0.9 b2 = 0.999 epsilon = 0.1 #1e-08 is the default momentum = 0.95 dropout1 = 1.0 #for input to first FC layer dropout2 = 1.0 #for intermediate layer input dropouts = [0.5, 0.4, 0.3, 0.2, 0.1] #,0.6] lambdas = [0.0005, 0.001] architectures = [ 3 ] # birds architecture sparrow, to make it unified (check model.py for definition) trainingSize = [1] #in seconds learning_rates = [0.0001, 0.00008] targets = 2 fftSize = 256 specType = 'mag_spec' padding = False for duration in trainingSize: print('Now loading the data !!') outPath = '../../spectrograms/' + specType + '/' + str( fftSize) + 'FFT/' + str(duration) + 'sec/' mean_std_file = outPath + 'train/mean_std.npz' # Load training data, labels and perform norm tD = dataset.load_data(outPath + 'train/') tL = dataset.get_labels_according_to_targets(trainP, targets) dataset.compute_global_norm(tD, mean_std_file) print('Shape of labels: ', tL.shape) #tD = dataset.normalise_data(tD,mean_std_file,'utterance') # utterance level tD = dataset.normalise_data(tD, mean_std_file, 'global_mv') # global #print('Norm td: max and min are ', np.max(tD)) # Load dev data, labels and perform norm devD = dataset.load_data(outPath + 'dev/') devL = dataset.get_labels_according_to_targets(devP, targets) #devD = dataset.normalise_data(devD,mean_std_file,'utterance') #print('first Norm dev: max and min are ', np.max(devD)) devD = dataset.normalise_data(devD, mean_std_file, 'global_mv') #print('Norm dev: max and min are ', np.max(devD)) trainSize = str( duration) + 'sec' ##may be change this in model.py also ! ### We are training on TRAIN set and validating on DEV set t_data = tD t_labels = tL v_data = devD v_labels = devL for dropout in dropouts: architecture = architectures[0] for lr in learning_rates: #hyp_str ='cnn'+str(architecture)+'_keepProb_1.0_' + str(dropout)+str(dropout3)+'lr'+str(lr) hyp_str = 'sparrow' + '_keep_' + str( dropout) + '_' + 'lr' + str(lr) + '_' + str( activation) + '_' + 'fft' + str(fftSize) log_dir = '../tensorflow_log_dir/sparrowArch/' + hyp_str model_save_path = '../models/sparrowArch/' + hyp_str logfile = model_save_path + '/training.log' figDirectory = model_save_path makeDirectory(model_save_path) print('Training model with ' + str(duration) + ' sec data and cnnModel' + str(architecture)) tLoss, vLoss, tAcc, vAcc = model.train( architecture, fftSize, padding, trainSize, t_data, t_labels, v_data, v_labels, activation, lr, use_lr_decay, epsilon, b1, b2, momentum, optimizer_type, dropout, dropout1, dropout2, model_save_path, log_dir, logfile, wDecayFlag, lossPenalty, applyBatchNorm, init_type, epochs, batch_size, targets) #plot_2dGraph('#Epochs', 'Avg CE Loss', tLoss,vLoss,'train_ce','val_ce', figDirectory+'/loss.png') #plot_2dGraph('#Epochs', 'Avg accuracy', tAcc,vAcc,'train_acc','val_acc',figDirectory+'/acc.png') plot_2dGraph('#Epochs', 'Val loss and accuracy', vLoss, vAcc, 'val_loss', 'val_acc', figDirectory + '/v_ls_acc.png')
def trainCNN_on_trainData(): #CNN Training parameters activation = 'mfm' #'elu' init_type = 'xavier' batch_size = 32 epochs = 2000 #0 # Regularizer parameters use_lr_decay = False #set this flag for LR decay wDecayFlag = False #whether to perform L2 weight decay or not lossPenalty = 0.001 # Using lambda=0.001 . applyBatchNorm = False deviceId = "/gpu:0" # Adam parameters optimizer_type = 'adam' b1 = 0.9 b2 = 0.999 epsilon = 0.1 momentum = 0.95 dropout3 = 1.0 #In russian arch there are only FC1 and output layer, so this not needed. #dropout1=[0.3,0.2,0.1] # we ran this originally ! we will look into this later #dropout2=[0.7,0.5,0.4,0.3,0.2] # for dropout1=0.3, we ran all combination dropout1 = [0.2, 0.1] dropout2 = [0.4, 0.2] #lambdas = [0.0005, 0.001] #dropout1=[1.0] #dropout2=[1.0] #dropout3=1.0 architectures = [5] # Russian Architecture is 5 trainingSize = [4] #in seconds lr = 0.0001 targets = 2 fftSize = 2048 specType = 'mag_spec' padding = True # Used following paths since I moved the scripts in git and used link so that code are synchronised spectrogramPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/spectrograms/' tensorboardPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/tensorflow_log_dir/' modelPath = '/homes/bc305/myphd/stage2/deeplearning.experiment1/CNN3/models/' for duration in trainingSize: print('Now loading the data !!') outPath = spectrogramPath + specType + '/' + str( fftSize) + 'FFT/' + str(duration) + 'sec/' mean_std_file = outPath + 'train/mean_std.npz' # Load training data, labels and perform norm tD = dataset.load_data(outPath + 'train/') tL = dataset.get_labels_according_to_targets(trainP, targets) if not os.path.exists(mean_std_file): print('Computing Mean_std file ..') dataset.compute_global_norm(tD, mean_std_file) print('Shape of labels: ', tL.shape) #tD = dataset.normalise_data(tD,mean_std_file,'utterance') # utterance level tD = dataset.normalise_data(tD, mean_std_file, 'global_mv') # global # Load dev data, labels and perform norm devD = dataset.load_data(outPath + 'dev/') devL = dataset.get_labels_according_to_targets(devP, targets) #devD = dataset.normalise_data(devD,mean_std_file,'utterance') devD = dataset.normalise_data(devD, mean_std_file, 'global_mv') ### We are training on TRAIN set and validating on DEV set t_data = tD t_labels = tL v_data = devD v_labels = devL for dropout in dropout1: architecture = architectures[0] for drop in dropout2: hyp_str = 'arch' + str(architecture) + '_keep_' + str( dropout) + '_' + str(drop) + '_' + str(duration) + 'sec' log_dir = tensorboardPath + '/rusCNN_max2000epochs/' + hyp_str model_save_path = modelPath + '/rusCNN_max2000epochs/' + hyp_str logfile = model_save_path + '/training.log' figDirectory = model_save_path makeDirectory(model_save_path) print('Training model with ' + str(duration) + ' sec data and cnnModel' + str(architecture)) tLoss, vLoss, tAcc, vAcc = model.train( architecture, fftSize, padding, duration, t_data, t_labels, v_data, v_labels, activation, lr, use_lr_decay, epsilon, b1, b2, momentum, optimizer_type, dropout, drop, dropout3, model_save_path, log_dir, logfile, wDecayFlag, lossPenalty, applyBatchNorm, init_type, epochs, batch_size, targets) #plot_2dGraph('#Epochs', 'Avg CE Loss', tLoss,vLoss,'train_ce','val_ce', figDirectory+'/loss.png') #plot_2dGraph('#Epochs', 'Avg accuracy', tAcc,vAcc,'train_acc','val_acc',figDirectory+'/acc.png') plot_2dGraph('#Epochs', 'Val loss and accuracy', vLoss, vAcc, 'val_loss', 'val_acc', figDirectory + '/v_ls_acc.png')
def other_features(input_type, data_list, labelFile, savePath, fft_size, win_size, hop_size, duration, data_window=100, window_shift=10, augment=True, save=True, minimum_length=1): ''' Merge this function and spectrograms. Can replace spectrogram function #1 ''' from audio import compute_spectrogram spectrograms = list() labels = list() if input_type == 'others': #load the npz file which in this case is data_list print('Loading the features..') spectrograms = np.load(data_list)['features'] print('Length is: ', len(spectrograms)) # Make these features unified across time dimension spectrograms = [ update_feature_matrix(matrix) for matrix in spectrograms ] else: print('Computing the ' + input_type + ' spectrograms !!') with open(data_list, 'r') as f: spectrograms = [ compute_spectrogram(input_type, file.strip(), fft_size, win_size, hop_size, duration, augment, minimum_length) for file in f ] # Get the labels into a list and save it along with the spectrograms with open(labelFile, 'r') as f: labels = [line.strip() for line in f] if augment: new_data = list() new_labels = list() assert (len(labels) == len(spectrograms)) print( 'Now performing augmentation using sliding window mechanism on original specs/features .... ' ) for i in range(len(spectrograms)): d, l = augment_data(spectrograms[i], labels[i], data_window, input_type, window_shift) new_data.extend( d) # extend the list rather than adding it into a new list new_labels.extend(l) spectrograms = new_data labels = new_labels if save: from helper import makeDirectory makeDirectory(savePath) outfile = savePath + '/spec' with open(outfile, 'w') as f: np.savez(outfile, spectrograms=spectrograms, labels=labels) print('Finished computing features/spectrograms and saved inside: ', savePath)