def run_model(Xlist_train, ylist_train, Xlist_val, ylist_val): #hist = model.fit(X_train, y_train, # epochs=epochs, # batch_size=batch_size, # validation_data = (X_val, y_val), # callbacks = callbacks_list, # shuffle = True) params = {'dim': (npix,npix), 'batch_size': batch_size, 'n_nu': n_nu, 'n_stokes': n_stokes, 'feature_scale': feature_scale} train_datagen = DataGenerator(Xlist_train, ylist_train, **params) val_datagen = DataGenerator(Xlist_val, ylist_val, **params) hist = model.fit_generator(generator=train_datagen, validation_data=val_datagen, #validation_steps=int(np.floor(len(X_val)/batch_size)), epochs=epochs, #steps_per_epoch=int(np.floor(len(X_train)/batch_size)), callbacks=callbacks_list, use_multiprocessing=True, workers=10 ) model_name = 'final_model.h5' model.save(model_name) return hist
def __init__(self, optmizer_index, activation_index, DNN_layers, DNN_neurons, dropout, batch_size, epochs, n_labels=3): self.activation = [ 'relu', 'selu' ] #currently using relu, threw in selu because Thea had it, but may want to replace with something else after more thinking self.optimizer = [ 'adam', 'nadam', 'adadelta' ] #currently using adam, also just plucked these from Thea, might want to change options after some more thinking self.optimizer_index = optmizer_index self.activation_index = activation_index self.DNN_layers = DNN_layers self.DNN_neurons = DNN_neurons self.dropout = dropout self.batch_size = batch_size self.epochs = epochs #self.n_labels = n_labels # n_labels = 3 training_generator = DataGenerator( partition['train'], **params ) #this should return X, Y where X is for the features in the training sample, Y for the labels in the training sample validation_generator = DataGenerator( partition['validation'], **params ) # this should return X,Y where X is for features in the test sample, Y is for the labels in the training sample #Do I need the self? --> I think what I did below is reasonable self.generator = training_generator self.validation_data = validation_generator #Also copied from training script #X_train, Y_train = training_generator #Is this ok? # this is giving me a too many values to unpack error #X_test, Y_test = validation_generator #Same comment Ok, maybe I do not need this and it is just making things screwy... self.n_labels = (training_generator.num_of_labels) # should be 3 self.input_shape = (training_generator.dim, ) #should be 8 # print(self.n_labels) # print (self.input_shape) #self.n_labels = 3 #self.input_shape = (8,) #self.__y_test = Y_test #self.__y_train = Y_train #self.__x_test = X_test #self.__x_train = X_train ###### End block of skepticism #### self.__model = self.build()
def __init__(self, optmizer_index, activation_index, DNN_layers, DNN_neurons, dropout, batch_size, epochs, batch_norm_index, n_labels): #this last part with batch norm index might be incorrect self.activation = ['relu', 'selu'] #currently using relu, threw in selu because Thea had it, but may want to replace with something else after more thinking self.optimizer = ['adam', 'nadam','adadelta'] #currently using adam, also just plucked these from Thea, might want to change options after some more thinking self.optimizer_index = optmizer_index self.activation_index = activation_index self.DNN_layers = DNN_layers self.DNN_neurons = DNN_neurons self.dropout = dropout self.batch_size = batch_size self.epochs = epochs self.batch_norm_index = batch_norm_index self.n_labels = n_labels ### Skeptical about this being right, sorry, I clearly did not quite get it when we met, sorry to be slow #### training_generator = DataGenerator(partition['train'], **params) #this should return X, Y where X is for the features in the training sample, Y for the labels in the training sample validation_generator = DataGenerator(partition['validation'], **params) # this should return X,Y where X is for features in the test sample, Y is for the labels in the training sample #now need to tell it something about the input shape...will FIX THIS when I figure out if I'm giving features_train, features_test and labels_train, labels_test correctly ###### End block of skepticism #### self.__model = self.build()
def model(image=False, audio=False, text=False): """ Train all 3 models :param image: Whether or not to train the image model on this run :param audio: Whether or not to train the audio model on this run :param text: Whether or not to train the text model on this run :return: """ if image: # Parameters params = { 'dim': (10, 224, 224), 'batch_size': 16, 'n_channels': 3, 'shuffle': True } # Load labels set with open('../data/image_data/pickle_files/y_5d_training.pkl', 'rb') as file: training_labels = pickle.load(file) with open('../data/image_data/pickle_files/y_5d_test.pkl', 'rb') as file: test_labels = pickle.load(file) # Generators training_generator = DataGenerator(partition='training', list_IDs=range(6000), labels=training_labels, **params) validation_generator = DataGenerator(partition='test', list_IDs=range(2000), labels=test_labels, **params) # Create model model = models.image_lrcn() # Train model on data set model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=6, epochs=5) model.save_weights('../output/image_model.h5') if audio: # Read in aduio data training_set = pd.read_csv( '../data/audio_data/pickle_files/training_df.csv') test_set = pd.read_csv('../data/audio_data/pickle_files/test_df.csv') # Concat data sets in order to use all data for CV all_data = pd.concat((training_set, test_set), axis=0) X_all = all_data.drop(['interview_score', 'video_id'], axis=1) y_all = all_data['interview_score'] logging.info('Start training audio model') # Create model and fit to data audio_model = models.audio_rand_forest() audio_model.fit(X_all, y_all) logging.info(audio_model.best_params_) logging.info('Train score with best estimator: {}'.format( max(audio_model.cv_results_['mean_train_score']))) logging.info('Test score with best estimator: {}'.format( max(audio_model.cv_results_['mean_test_score']))) # Save to disk with open('../output/audio_model.pkl', 'wb') as fid: pickle.dump(audio_model, fid) if text: # Load in word embeddings embedding_matrix, word_to_index = resources.create_embedding_matrix() # Load text data with open('../data/text_data/pickle_files/X_training.pkl', 'rb') as file: X_train = pickle.load(file) with open('../data/text_data/pickle_files/y_training.pkl', 'rb') as file: y_train = pickle.load(file) with open('../data/text_data/pickle_files/X_test.pkl', 'rb') as file: X_test = pickle.load(file) with open('../data/text_data/pickle_files/y_test.pkl', 'rb') as file: y_test = pickle.load(file) # Create model object and fit text_model = models.text_lstm_model(embedding_matrix=embedding_matrix) filename = '../output/text_model.h5' checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min') text_model.fit(X_train, y_train, batch_size=32, epochs=55, validation_data=(X_test, y_test), callbacks=[checkpoint], shuffle=True) pass
def score_new_vid(): logging.info('Begin extraction for scoring partition') # Extract features from vids lib.extract_images(partition='score', num_frames=10) lib.extract_audio(partition='score') lib.extract_text(partition='score', training=False) logging.info('Begin transformation for scoring partition') # Transform features embedding_matrix, word_to_index = resources.create_embedding_matrix() lib.transform_images(partition='score', num_frames=10, training=False) lib.transform_audio(partition='score', n_mfcc=13, training=False) lib.transform_text(partition='score', word_to_index=word_to_index, training=False) logging.info('Load models for evaluation of the scoring partition') # Load models image_model = models.image_lrcn() image_model.load_weights('../output/image_model.h5') audio_model = pickle.load(open('../output/audio_model.pkl', 'rb')) text_model = load_model('../output/text_model.h5') ensemble_model = pickle.load(open('../output/ensemble_model.pkl', 'rb')) logging.info('Load transformed data') # Load image data with open('../data/image_data/pickle_files/vid_ids_5d_score.pkl', 'rb') as file: id_img_score = pickle.load(file) # Load audio data aud_to_score = pd.read_csv('../data/audio_data/pickle_files/score_df.csv') X_aud_score = aud_to_score.drop(['video_id'], axis=1) id_aud_score = aud_to_score['video_id'] # Load text data with open('../data/text_data/pickle_files/X_score.pkl', 'rb') as file: X_text_score = pickle.load(file) with open('../data/text_data/pickle_files/vid_ids_score.pkl', 'rb') as file: id_text_score = pickle.load(file) # Load generator score_generator = DataGenerator( partition='training', list_IDs=range(len(id_aud_score)), labels=[0 for i in range(len(id_aud_score))], batch_size=len(id_aud_score), n_channels=3, dim=(10, 224, 224), shuffle=False) logging.info('Predict values with image, text and audio models') # Predict values img_score_df = pd.DataFrame({ 'img_preds': [i[0] for i in image_model.predict_generator(score_generator)], 'video_ids': id_img_score }) aud_score_df = pd.DataFrame({ 'aud_preds': audio_model.predict(X_aud_score), 'video_ids': id_aud_score }) text_score_df = pd.DataFrame({ 'text_preds': [i[0] for i in text_model.predict(X_text_score)], 'video_ids': id_text_score }) logging.info('Make final predictions') # Merge predictions score_preds = img_score_df.merge(aud_score_df, on='video_ids') score_preds = score_preds.merge(text_score_df, on='video_ids') # Make final prediction X_score = score_preds[['img_preds', 'aud_preds', 'text_preds']] score_preds['final_prediction'] = ensemble_model.predict(X_score) # Save predictions to disk score_preds.to_csv('../output/predictions.csv', index=False) pass
def ensemble(): logging.info('Begin Ensemble model building, loading models') # Load models image_model = models.image_lrcn() image_model.load_weights('../output/image_model.h5') audio_model = pickle.load(open('../output/audio_model.pkl', 'rb')) text_model = load_model('../output/text_model.h5') # Load labels set with open('../data/image_data/pickle_files/y_5d_training.pkl', 'rb') as file: training_labels = pickle.load(file) with open('../data/image_data/pickle_files/y_5d_test.pkl', 'rb') as file: test_labels = pickle.load(file) # Load generators training_generator = DataGenerator(partition='training', list_IDs=range(6000), labels=training_labels, batch_size=16, n_channels=3, dim=(10, 224, 224), shuffle=False) validation_generator = DataGenerator(partition='test', list_IDs=range(2000), labels=test_labels, batch_size=16, n_channels=3, dim=(10, 224, 224), shuffle=False) holdout_generator = DataGenerator(partition='validation', list_IDs=range(2000), labels=test_labels, batch_size=16, n_channels=3, dim=(10, 224, 224), shuffle=False) logging.info('Load data files') # Load image data with open('../data/image_data/pickle_files/y_training.pkl', 'rb') as file: y_img_train = pickle.load(file) with open('../data/image_data/pickle_files/y_test.pkl', 'rb') as file: y_img_test = pickle.load(file) with open('../data/image_data/pickle_files/y_validation.pkl', 'rb') as file: y_img_val = pickle.load(file) with open('../data/image_data/pickle_files/vid_ids_training.pkl', 'rb') as file: id_img_train = pickle.load(file) with open('../data/image_data/pickle_files/vid_ids_test.pkl', 'rb') as file: id_img_test = pickle.load(file) with open('../data/image_data/pickle_files/vid_ids_validation.pkl', 'rb') as file: id_img_val = pickle.load(file) # Load audio data aud_train = pd.read_csv('../data/audio_data/pickle_files/training_df.csv') aud_test = pd.read_csv('../data/audio_data/pickle_files/test_df.csv') aud_val = pd.read_csv('../data/audio_data/pickle_files/validation_df.csv') X_aud_train = aud_train.drop(['interview_score', 'video_id'], axis=1) id_aud_train = aud_train['video_id'] X_aud_test = aud_test.drop(['interview_score', 'video_id'], axis=1) id_aud_test = aud_test['video_id'] X_aud_val = aud_val.drop(['interview_score', 'video_id'], axis=1) id_aud_val = aud_val['video_id'] # Load text data with open('../data/text_data/pickle_files/X_training.pkl', 'rb') as file: X_text_train = pickle.load(file) with open('../data/text_data/pickle_files/X_test.pkl', 'rb') as file: X_text_test = pickle.load(file) with open('../data/text_data/pickle_files/X_validation.pkl', 'rb') as file: X_text_val = pickle.load(file) with open('../data/text_data/pickle_files/vid_ids_training.pkl', 'rb') as file: id_text_train = pickle.load(file) with open('../data/text_data/pickle_files/vid_ids_test.pkl', 'rb') as file: id_text_test = pickle.load(file) with open('../data/text_data/pickle_files/vid_ids_validation.pkl', 'rb') as file: id_text_val = pickle.load(file) logging.info('Getting predictions for all 3 models') # Get predictions img_train_df = pd.DataFrame({ 'img_preds': [i[0] for i in image_model.predict_generator(training_generator)], 'video_ids': id_img_train, 'interview_score': y_img_train }) img_test_df = pd.DataFrame({ 'img_preds': [i[0] for i in image_model.predict_generator(validation_generator)], 'video_ids': id_img_test, 'interview_score': y_img_test }) img_val_df = pd.DataFrame({ 'img_preds': [i[0] for i in image_model.predict_generator(holdout_generator)], 'video_ids': id_img_val, 'interview_score': y_img_val }) aud_train_df = pd.DataFrame({ 'aud_preds': audio_model.predict(X_aud_train), 'video_ids': id_aud_train }) aud_test_df = pd.DataFrame({ 'aud_preds': audio_model.predict(X_aud_test), 'video_ids': id_aud_test }) aud_val_df = pd.DataFrame({ 'aud_preds': audio_model.predict(X_aud_val), 'video_ids': id_aud_val }) text_train_df = pd.DataFrame({ 'text_preds': [i[0] for i in text_model.predict(X_text_train)], 'video_ids': id_text_train }) text_test_df = pd.DataFrame({ 'text_preds': [i[0] for i in text_model.predict(X_text_test)], 'video_ids': id_text_test }) text_val_df = pd.DataFrame({ 'text_preds': [i[0] for i in text_model.predict(X_text_val)], 'video_ids': id_text_val }) logging.info('Merge predictions together into single data frame') # Merge predictions train_preds = img_train_df.merge(aud_train_df, on='video_ids') train_preds = train_preds.merge(text_train_df, on='video_ids') test_preds = img_test_df.merge(aud_test_df, on='video_ids') test_preds = test_preds.merge(text_test_df, on='video_ids') val_preds = img_val_df.merge(aud_val_df, on='video_ids') val_preds = val_preds.merge(text_val_df, on='video_ids') # Score models img_train_score = np.sqrt( mean_squared_error(train_preds['interview_score'], train_preds['img_preds'])) img_test_score = np.sqrt( mean_squared_error(test_preds['interview_score'], test_preds['img_preds'])) img_val_score = np.sqrt( mean_squared_error(val_preds['interview_score'], val_preds['img_preds'])) aud_train_score = np.sqrt( mean_squared_error(train_preds['interview_score'], train_preds['aud_preds'])) aud_test_score = np.sqrt( mean_squared_error(test_preds['interview_score'], test_preds['aud_preds'])) aud_val_score = np.sqrt( mean_squared_error(val_preds['interview_score'], val_preds['aud_preds'])) text_train_score = np.sqrt( mean_squared_error(train_preds['interview_score'], train_preds['text_preds'])) text_test_score = np.sqrt( mean_squared_error(test_preds['interview_score'], test_preds['text_preds'])) text_val_score = np.sqrt( mean_squared_error(val_preds['interview_score'], val_preds['text_preds'])) # Print scores to screen logging.info('Image score on the training set: {}'.format(img_train_score)) logging.info('Image score on the test set: {}'.format(img_test_score)) logging.info('Image score on the val set: {}'.format(img_val_score)) logging.info('Audio score on the training set: {}'.format(aud_train_score)) logging.info('Audio score on the test set: {}'.format(aud_test_score)) logging.info('Audio score on the val set: {}'.format(aud_val_score)) logging.info('Text score on the training set: {}'.format(text_train_score)) logging.info('Text score on the test set: {}'.format(text_test_score)) logging.info('Text score on the val set: {}'.format(text_val_score)) # Split target variable and features X_train = train_preds[['img_preds', 'aud_preds', 'text_preds']] y_train = train_preds[['interview_score']] X_test = test_preds[['img_preds', 'aud_preds', 'text_preds']] y_test = test_preds[['interview_score']] X_val = val_preds[['img_preds', 'aud_preds', 'text_preds']] y_val = val_preds[['interview_score']] logging.info('Build OLS model to combine model outputs') # Build OLS model ols_model = LinearRegression() ols_model.fit(X_train, y_train) # Score model train_score = np.sqrt( mean_squared_error(y_train, ols_model.predict(X_train))) test_score = np.sqrt(mean_squared_error(y_test, ols_model.predict(X_test))) val_score = np.sqrt(mean_squared_error(y_val, ols_model.predict(X_val))) logging.info('OLS Score on training set: {}'.format(train_score)) logging.info('OLS Score on test set: {}'.format(test_score)) logging.info('OLS Score on val set: {}'.format(val_score)) # Save model with open('../output/ensemble_model.pkl', 'wb') as fid: pickle.dump(ols_model, fid) logging.info('Ensemble model saved') return
from keras.models import Sequential from my_classes import DataGenerator # Parameters params = {'dim_x': 32, 'dim_y': 32, 'dim_z': 32, 'batch_size': 32, 'shuffle': True} # Datasets partition = # IDs labels = # Labels # Generators training_generator = DataGenerator(**params).generate(labels, partition['train']) validation_generator = DataGenerator(**params).generate(labels, partition['validation']) # Design model model = Sequential() [...] # Architecture model.compile() # Train model on dataset model.fit_generator(generator = training_generator, steps_per_epoch = len(partition['train'])//batch_size, validation_data = validation_generator, validation_steps = len(partition['validation'])//batch_size)
with open('/data/data1/users/konpyro/text_feats/ids.pkl', 'rb') as file: text_ids = pkl.load(file) # Parameters params = { 'dim': (60, 431), 'batch_size': 16, 'n_classes': 4, 'n_channels': 6, 'shuffle': False } epochs = 10 # Generators generator = DataGenerator(ids, labels, **params) # Late fuse model model_A1 = load_model('/data/data1/users/konpyro/model_A1.h5') #model_T2 = tf.keras.models.load_model('/data/data1/users/konpyro/model_T2.h5', custom_objects={'BertLayer': bertlayer.BertLayer}) # load json and create model json_file = open('/data/data1/users/konpyro/model.json', 'r') loaded_model_json = json_file.read() json_file.close() model_T2 = model_from_json(loaded_model_json, custom_objects={'BertLayer': bertlayer.BertLayer}) # load weights into new model model_T2.load_weights("/data/data1/users/konpyro/model_weights.h5") print("Loaded model from disk") input_shape = (8, )
def cnn_model(pickle_file_path): print("Reached cnn model") params = {'dim': (15250, 320, 5), 'batch_size': 10, 'n_classes': 2, 'n_channels': 1, 'shuffle': True} RFI_files = glob.glob(pickle_file_path + '*rfi_*.pkl') Furby_files = glob.glob(pickle_file_path + '*fur*.pkl') train = [] validation = [] labels = [] train = RFI_files[:-483] print(np.shape(train)) train = np.concatenate((train, Furby_files[:-483]), axis = 0) print(np.shape(train)) validation = RFI_files[-483:] print(np.shape(validation)) validation = np.concatenate((validation, Furby_files[-483:]), axis = 0) print(np.shape(validation)) labels_RFI = np.zeros(np.shape(RFI_files)) labels_fur = np.ones(np.shape(Furby_files)) labels = np.concatenate((labels_RFI, labels_fur), axis = 0) # print(np.concatenate((RFI_files, Furby_files), axis = 0)[0]) partition = {'train': train, 'validation': validation} label = dict(zip(np.concatenate((RFI_files, Furby_files), axis = 0), labels)) # print(label) print(np.shape(label)) print(np.shape(partition['train'])) print(np.shape(partition['validation'])) # Generators training_generator = DataGenerator(partition['train'], label, **params) validation_generator = DataGenerator(partition['validation'], label, **params) # le = LabelEncoder() # yy = to_categorical(le.fit_transform(y)) # X_train, X_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42) # print("split the data into training and test sets") # def correct_dim_cnn(array): # output=[] # for i in array: # print(np.shape(i)) # output.append(i.reshape(15250, 320, 5)) # return output # X_train = correct_dim_cnn(X_train) # X_test = correct_dim_cnn(X_test) # print(np.shape(X_train)) # print(np.shape(X_test)) # print(y_test) # print(y_train) def create_cnn(height, width, depth, filters=(250, 32, 5), regress=False): HWD = (height, width, depth) print(HWD) Dim = -1 inputs = Input(shape = HWD) for (i, f) in enumerate(filters): if i == 0: x = inputs x = Conv2D(f, (3, 3), padding="valid")(x) x = Activation("relu")(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = BatchNormalization(axis=Dim)(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = Flatten()(x) x = Dense(16)(x) x = Activation("relu")(x) x = BatchNormalization(axis=Dim)(x) x = Dropout(0.5)(x) x = Dense(4)(x) x = Activation("relu")(x) model = Model(inputs, x) return model cnn_model = create_cnn(15250, 320, 5, regress=False) cnn_model.summary() num_labels = 2 x = Dense(4, activation="relu")(cnn_model.output) x = Dense(num_labels, activation="softmax")(x) model = Model(inputs=[cnn_model.input], outputs=x) run_opts = tf.compat.v1.RunOptions(report_tensor_allocations_upon_oom = True) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001)) model.summary() num_epochs = 100 num_batch_size = 10 checkpointer = ModelCheckpoint(filepath= pickle_file_path +'weights.best.ABC_returns_learning_rate_0.001_batch_150_epochs_100.hdf5', verbose=1, save_best_only=True) start = datetime.now() # with tf.compat.v1.Session( config = tf.compat.v1.ConfigProto( log_device_placement = True ) ): model.fit_generator(generator=training_generator, validation_data=validation_generator) # model.fit_generator(generator=training_generator, # validation_data=validation_generator, # use_multiprocessing=True, # workers=6, callbacks=[checkpointer], verbose=1) # model.fit_generator(generator=training_generator, validation_data=validation_generator) #model.fit([X_train], y_train, validation_data = ([X_test], y_test), batch_size=num_batch_size, epochs=num_epochs, callbacks=[checkpointer], verbose=1) duration = datetime.now() - start print("Training completed in time: ", duration) score = model.evaluate([x_freq_time_test], y_test, verbose=1) accuracy = 100*score print("Accuracy on validation set: %.4f%%" % accuracy) model_json = model.to_json() with open(pickle_file_path + "model_ABC_returns_learning_rate_0.001_batch_150_epochs_100.json", "w") as json_file: json_file.write(model_json)
def generator_main(): #original_dataset_dir = 'C:\\svn\\dwatts\\dev\\datasets\\whale_data\\data' #original_train_dataset_dir = 'C:\\svn\\dwatts\\dev\\datasets\\whale_data\\data\\train' #base_dir = 'C:\\svn\\dwatts\\dev\\dl_with_python\\whale_small\\' original_dataset_dir = '/home/david/data/whale_data/data' original_train_dataset_dir = '/home/david/data/whale_data/data/train' base_dir = '/home/david/dev/deepLearningForPython/whale_small/' # spectrogram parameters params = {'batch_size': 64, 'dim': (48,126), 'n_channels': 1, 'n_classes': 2, 'shuffle': True, 'NFFT':64, 'Fs':2000, 'noverlap':32} # Number of time slice metrics maxTime = 126 Ntrain = 5000 Nval = 500 Ntest = 500 train_loc = 'whale_small/train' val_loc = 'whale_small/validation' # load data, parition and labels: # e.g. {'train': ['id-1', 'id-2', 'id-3'], 'validation': ['id-4']} # e.g. {'id-1': 0, 'id-2': 1, 'id-3': 2, 'id-4': 1} #train = fileio.TrainData(original_dataset_dir+'\\train.csv',base_dir+'train') partition, labels = create_dirs(Ntrain, Nval, Ntest, original_dataset_dir, base_dir) # Generators training_generator = DataGenerator(partition['train'], labels, train_loc, **params) validation_generator = DataGenerator(partition['validation'], labels, val_loc, **params) for data_batch, labels_batch in training_generator: print('data batch shape:', data_batch.shape) print('labels batch shape:', labels_batch.shape) break # Determine proper input shape input_shape = (params['dim'][0], params['dim'][1], params['n_channels']) ''' model = models.Sequential() model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape)) model.add(layers.MaxPooling2D((2, 2),dim_ordering="th")) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2),dim_ordering="th")) model.add(layers.Conv2D(128, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2),dim_ordering="th")) model.add(layers.Conv2D(128, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2),dim_ordering="th")) model.add(layers.Flatten()) model.add(layers.Dense(512, activation='relu')) model.add(layers.Dense(2, activation='softmax')) ''' melgram_input = Input(shape=input_shape) # Only tf dimension ordering channel_axis = 3 freq_axis = 1 time_axis = 2 # Input block x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(melgram_input) # Conv block 1 x = Convolution2D(64, 3, 3, border_mode='same', name='conv1')(x) x = BatchNormalization(axis=channel_axis, mode=0, name='bn1')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(2, 4), dim_ordering="th", name='pool1')(x) # Conv block 2 x = Convolution2D(128, 3, 3, border_mode='same', name='conv2')(x) x = BatchNormalization(axis=channel_axis, mode=0, name='bn2')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(2, 4), dim_ordering="th", name='pool2')(x) # Conv block 3 x = Convolution2D(128, 3, 3, border_mode='same', name='conv3')(x) x = BatchNormalization(axis=channel_axis, mode=0, name='bn3')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(2, 4), dim_ordering="th", name='pool3')(x) # Conv block 4 x = Convolution2D(128, 3, 3, border_mode='same', name='conv4')(x) x = BatchNormalization(axis=channel_axis, mode=0, name='bn4')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(3, 5), dim_ordering="th", name='pool4')(x) # Conv block 5 x = Convolution2D(64, 3, 3, border_mode='same', name='conv5')(x) x = BatchNormalization(axis=channel_axis, mode=0, name='bn5')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(4, 4), dim_ordering="th", name='pool5')(x) # Output x = Flatten()(x) x = Dense(50, activation='relu', name='hidden1')(x) x = Dense(2, activation='softmax', name='output')(x) # Create model model = Model(melgram_input, x) model.summary() from keras import optimizers # Compile the model model.compile( loss='categorical_crossentropy', optimizer=optimizers.RMSprop(lr=1e-4), metrics=['acc']) # Train model on dataset history = model.fit_generator( training_generator, steps_per_epoch=100, epochs=10, validation_data=validation_generator, validation_steps=50) #use_multiprocessing=True, #workers=1) model.save('whale_small_1.h5') import matplotlib.pyplot as plt acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(acc) + 1) plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and validation accuracy') plt.legend() plt.figure() plt.plot(epochs, loss, 'bo', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss') plt.legend() plt.show()
train_list += partition_dict[partition] for partition in model_params['test_partitions']: test_list += partition_dict[partition] model_params['train_len'] = len(train_list) model_params['test_len'] = len(test_list) # specify generator parameters params = {'dim': (model_params['num_rows'],model_params['num_cols']), 'batch_size': model_params['batch_size'], 'n_classes': model_params['num_classes'], 'n_channels': model_params['channels'], 'shuffle': True, 'scale_image': model_params['preprocess_input'] } # instantiate the generators train_generator = DataGenerator(train_list, labels_dict, model_params['augmentation'], **params) test_generator = DataGenerator(test_list, labels_dict, False, **params) # log details log_model_details(model, model_params) # for convenience model_rev_name = model_params['model_name'] + ' ' + model_params['rev'] mode = model_params['run_mode'] if mode == 'train' or mode == 'train-eval': # train the model on the new data for specified epochs model.fit_generator(generator=train_generator, epochs=model_params['training_epochs'], use_multiprocessing=True, workers=8)
X = [] # List of all training examples Y = [] # List all corresponding labels for i in range(0, len(new_text) - maxlen, step): X.append(new_text[i:i + maxlen]) Y.append(new_text[i + maxlen]) # Parameters of the generator params = { 'batch_size': 12, 'shuffle': True, 'word_indices': word_indices, 'indices_word': indices_word, 'maxlen': maxlen } training_generator = DataGenerator(X, Y, **params) model = Sequential() model.add(LSTM(128, input_shape=(maxlen, len(words)))) model.add(Dense(len(words), activation='softmax')) optimizer = RMSprop(learning_rate=0.01) model.compile(loss='categorical_crossentropy', optimizer=optimizer) def sample(preds, temperature=1.0): # helper function to sample an index from a probability array preds = np.asarray(preds).astype('float64') preds = np.log(preds) / temperature exp_preds = np.exp(preds) preds = exp_preds / np.sum(exp_preds)
def main(opt): if opt.train == '1': print( '| Training a CNN based Speaker Verification System |' ) print( ' ******************************************************************************************\n' ) training_filename = ('training_labels.lst') training_list = open(training_filename, "r") show_names, show_labels = functions.read_file(training_list) # To encode target labels with value between 0 and n_classes-1 label_encoder = LabelEncoder() data_labels = label_encoder.fit_transform(show_labels) opt.n_classes = len(np.unique(data_labels)) print('Number of classes', len(np.unique(data_labels))) n_frames = opt.window_size n_features1 = 80 # n_features2 = 9 # n_channels = 1 optm = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) input_shape1 = (n_frames, n_features1, n_channels) input_shape2 = (n_frames, n_features2, n_channels) # Partitions train_names, val_names = train_test_split(show_names, test_size=0.20, random_state=4) partition = {'train': train_names, 'validation': val_names} zipObj = zip(show_names, data_labels) labels = dict( zipObj ) # dictionary looks like this {'id10278/QOq66XogW3Q/00005': 8, ...} # Parameters params = { 'dim1': (n_frames, n_features1), 'dim2': (n_frames, n_features2), 'n_frames': n_frames, 'batch_size': opt.batch_size, 'n_classes': opt.n_classes, 'n_channels': n_channels, 'shuffle': True, 'suffixes': ['.mel2', '.xls3'] } print('DataGenerator Params', params) # Generators training_generator = DataGenerator(partition['train'], labels, **params) validation_generator = DataGenerator(partition['validation'], labels, **params) # comment out below if loading an existing model instead... # model = functions.cnn(opt, 3, n_filters=[128,256,256], input_shape=input_shape1) model_name = 'cnn_pulse.h5' if (continue_from_last_trained_model): print('Continuing from a saved model...') model = load_model(model_name) # model = load_model(model_name, custom_objects=SeqWeightedAttention.get_custom_objects()) #model.compile(optimizer=optm, loss='categorical_crossentropy', metrics = ['accuracy']) #model.set_weights(last_model.get_weights()) else: model = functions.cnn_concat(opt, 3, n_filters=[128, 256, 256], input_shape1=input_shape1, input_shape2=input_shape2) # model = functions.cnn(opt, 3, n_filters=[128,256,256], input_shape=input_shape2) model.compile(optimizer=optm, loss='categorical_crossentropy', metrics=['accuracy']) model.summary() checkpoint = ModelCheckpoint(model_name, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model.fit_generator( generator=training_generator, epochs=opt.max_epochs, validation_data=validation_generator, verbose=1, shuffle=True, #workers=2, #use_multiprocessing=True, callbacks=callbacks_list) print('.... Saving model \n') # model.save(opt.save_dir + model_name, overwrite=True) if opt.predict == '1': print(' -------------------------------------------------') print( '| Prediciting using trained CNN based Speaker Verification Model |' ) print( '******************************************************************************************************\n' ) validation_trials = 'VoxCeleb-1_validation_trials.txt' validation_list = open(validation_trials, "r") validation_names = functions.read_trials(validation_list) #print(validation_names) #exit(1) model_name = 'cnn_conv1d3-b3-k11.h5' model = load_model('./models/triton/07-09/' + model_name) # model = load_model(opt.save_dir + model_name) # model = load_model(opt.save_dir + model_name, custom_objects=SeqWeightedAttention.get_custom_objects()) model.summary() print('Model %s loaded' % model_name) score_file = './scores/' + model_name[:-3] functions.predict_by_model(opt, model, validation_names, score_file, 'Embedding') # concatenate_1 _mel print('.... Done prediction with model : %s' % model_name)
# To be run as a notebook # Pipeline for training the model from model import * import tensorflow as tf from my_classes import DataGenerator import os from ds_utils.data_loader import * #Data Generation data = get_data() dataset = DataGenerator(data[:, 0], data[:, 1]) """ Data Augmentation """ # Building the model input_shape = (32, 32, 32) def main(): model = DSModel(input_shape) model.build() model.compile() model.summary() input() # Training with sortagrad hist1 = model.fit(dataset, epochs = 1)
for idx, i in enumerate(devProtocols): dev_IDs[idx] = i.split()[0] label = i.split()[1] # spoof=0 , genuine=1 dev_labels[idx] = 0 if label == 'spoof' else 1 labels[dev_IDs[idx]] = dev_labels[idx] params = {'batch_size': 32, 'n_classes': 2, 'shuffle': True} partition = { 'train': train_IDs, 'validation': dev_IDs[:150] + dev_IDs[len(dev_IDs) - 150:] } training_generator = DataGenerator(partition['train'], labels, **params, path_to_dir=path_to_dir + path_to_trainDataset) validation_generator = DataGenerator(partition['validation'], labels, **params, path_to_dir=path_to_dir + path_to_devDataset) ############################################################################################ input_shape = (257, 400, 1) num_classes = 2 def conv2d_bn(x,
import numpy as np from keras.models import Sequential from my_classes import DataGenerator # Parameters params = {'dim': (32,32,32), 'batch_size': 64, 'n_classes': 6, 'n_channels': 1, 'shuffle': True} # Datasets partition = # IDs labels = # Labels # Generators training_generator = DataGenerator(partition['train'], labels, **params) validation_generator = DataGenerator(partition['validation'], labels, **params) # Design model model = Sequential() [...] # Architecture model.compile() # Train model on dataset model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=6)
pickle.dump(partition, handle, protocol=pickle.HIGHEST_PROTOCOL) with open('labels.pkl', 'wb') as handle: pickle.dump(labels, handle, protocol=pickle.HIGHEST_PROTOCOL) print('loaded labels, saved files') params = { 'dim': (299, 299), 'batch_size': 16, 'n_classes': 2, 'n_channels': 3, 'shuffle': False } print('initializing data generator') finding = 7 train_gen = DataGenerator(partition['train'], labels, finding, **params) val_gen = DataGenerator(partition['validation'], labels, finding, **params) print('making model') """ model = Sequential() model.add(Conv2D(32, (3, 3), input_shape=(299, 299, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) """