def run_model(Xlist_train, ylist_train, Xlist_val, ylist_val):
	#hist = model.fit(X_train, y_train, 
	#                 epochs=epochs,
	#                 batch_size=batch_size,
	#                 validation_data = (X_val, y_val),
	#                 callbacks = callbacks_list,
	#                 shuffle = True)
                  

	params = {'dim': (npix,npix),
		  'batch_size': batch_size,
		  'n_nu': n_nu,
		  'n_stokes': n_stokes,
		  'feature_scale': feature_scale}
    
	train_datagen = DataGenerator(Xlist_train, ylist_train, **params)
	val_datagen = DataGenerator(Xlist_val, ylist_val, **params)
  								  
	hist = model.fit_generator(generator=train_datagen,
 	                           validation_data=val_datagen,
 	                           #validation_steps=int(np.floor(len(X_val)/batch_size)),
 	                           epochs=epochs,
    	                           #steps_per_epoch=int(np.floor(len(X_train)/batch_size)),
    	                           callbacks=callbacks_list,
    	                           use_multiprocessing=True,
    	                           workers=10
    	                           )    
        
	model_name = 'final_model.h5'
	model.save(model_name)

	return hist
    def __init__(self,
                 optmizer_index,
                 activation_index,
                 DNN_layers,
                 DNN_neurons,
                 dropout,
                 batch_size,
                 epochs,
                 n_labels=3):

        self.activation = [
            'relu', 'selu'
        ]  #currently using relu, threw in selu because Thea had it, but may want to replace with something else after more thinking
        self.optimizer = [
            'adam', 'nadam', 'adadelta'
        ]  #currently using adam, also just plucked these from Thea, might want to change options after some more thinking
        self.optimizer_index = optmizer_index
        self.activation_index = activation_index
        self.DNN_layers = DNN_layers
        self.DNN_neurons = DNN_neurons
        self.dropout = dropout
        self.batch_size = batch_size
        self.epochs = epochs
        #self.n_labels = n_labels
        #        n_labels = 3
        training_generator = DataGenerator(
            partition['train'], **params
        )  #this should return X, Y where X is for the features in the training sample, Y for the labels in the training sample
        validation_generator = DataGenerator(
            partition['validation'], **params
        )  # this should return X,Y where X is for features in the test sample, Y is for the labels in the training sample
        #Do I need the self? --> I think what I did below is reasonable

        self.generator = training_generator
        self.validation_data = validation_generator
        #Also copied from training script
        #X_train, Y_train = training_generator #Is this ok? # this is giving me a too many values to unpack error
        #X_test, Y_test = validation_generator  #Same comment Ok, maybe I do not need this and it is just making things screwy...
        self.n_labels = (training_generator.num_of_labels)  # should be 3
        self.input_shape = (training_generator.dim, )  #should be 8
        # print(self.n_labels)
        # print (self.input_shape)
        #self.n_labels = 3
        #self.input_shape = (8,)
        #self.__y_test  = Y_test
        #self.__y_train = Y_train
        #self.__x_test  = X_test
        #self.__x_train = X_train

        ###### End block of skepticism ####

        self.__model = self.build()
 def __init__(self, optmizer_index, activation_index, DNN_layers, DNN_neurons, dropout, batch_size, epochs, batch_norm_index, n_labels): #this last part with batch norm index might be incorrect
 
     self.activation = ['relu', 'selu'] #currently using relu, threw in selu because Thea had it, but may want to replace with something else after more thinking
     self.optimizer = ['adam', 'nadam','adadelta'] #currently using adam, also just plucked these from Thea, might want to change options after some more thinking 
     self.optimizer_index = optmizer_index
     self.activation_index = activation_index
     self.DNN_layers = DNN_layers
     self.DNN_neurons = DNN_neurons
     self.dropout = dropout
     self.batch_size = batch_size
     self.epochs = epochs
     self.batch_norm_index = batch_norm_index
     self.n_labels = n_labels
 
     ### Skeptical about this being right, sorry, I clearly did not quite get it when we met, sorry to be slow ####
     training_generator = DataGenerator(partition['train'], **params) #this should return X, Y where X is for the features in the training sample, Y for the labels in the training sample
     validation_generator = DataGenerator(partition['validation'], **params) # this should return X,Y where X is for features in the test sample, Y is for the labels in the training sample
     
     #now need to tell it something about the input shape...will FIX THIS when I figure out if I'm giving features_train, features_test and labels_train, labels_test correctly
     ###### End block of skepticism ####
 
     self.__model = self.build()
Beispiel #4
0
def model(image=False, audio=False, text=False):
    """
    Train all 3 models
    :param image: Whether or not to train the image model on this run
    :param audio: Whether or not to train the audio model on this run
    :param text: Whether or not to train the text model on this run
    :return:
    """

    if image:

        # Parameters
        params = {
            'dim': (10, 224, 224),
            'batch_size': 16,
            'n_channels': 3,
            'shuffle': True
        }

        # Load labels set
        with open('../data/image_data/pickle_files/y_5d_training.pkl',
                  'rb') as file:
            training_labels = pickle.load(file)
        with open('../data/image_data/pickle_files/y_5d_test.pkl',
                  'rb') as file:
            test_labels = pickle.load(file)

        # Generators
        training_generator = DataGenerator(partition='training',
                                           list_IDs=range(6000),
                                           labels=training_labels,
                                           **params)
        validation_generator = DataGenerator(partition='test',
                                             list_IDs=range(2000),
                                             labels=test_labels,
                                             **params)

        # Create model
        model = models.image_lrcn()

        # Train model on data set
        model.fit_generator(generator=training_generator,
                            validation_data=validation_generator,
                            use_multiprocessing=True,
                            workers=6,
                            epochs=5)

        model.save_weights('../output/image_model.h5')

    if audio:

        # Read in aduio data
        training_set = pd.read_csv(
            '../data/audio_data/pickle_files/training_df.csv')
        test_set = pd.read_csv('../data/audio_data/pickle_files/test_df.csv')

        # Concat data sets in order to use all data for CV
        all_data = pd.concat((training_set, test_set), axis=0)
        X_all = all_data.drop(['interview_score', 'video_id'], axis=1)
        y_all = all_data['interview_score']

        logging.info('Start training audio model')

        # Create model and fit to data
        audio_model = models.audio_rand_forest()
        audio_model.fit(X_all, y_all)

        logging.info(audio_model.best_params_)
        logging.info('Train score with best estimator: {}'.format(
            max(audio_model.cv_results_['mean_train_score'])))
        logging.info('Test score with best estimator: {}'.format(
            max(audio_model.cv_results_['mean_test_score'])))

        # Save to disk
        with open('../output/audio_model.pkl', 'wb') as fid:
            pickle.dump(audio_model, fid)

    if text:

        # Load in word embeddings
        embedding_matrix, word_to_index = resources.create_embedding_matrix()

        # Load text data
        with open('../data/text_data/pickle_files/X_training.pkl',
                  'rb') as file:
            X_train = pickle.load(file)
        with open('../data/text_data/pickle_files/y_training.pkl',
                  'rb') as file:
            y_train = pickle.load(file)
        with open('../data/text_data/pickle_files/X_test.pkl', 'rb') as file:
            X_test = pickle.load(file)
        with open('../data/text_data/pickle_files/y_test.pkl', 'rb') as file:
            y_test = pickle.load(file)

        # Create model object and fit
        text_model = models.text_lstm_model(embedding_matrix=embedding_matrix)
        filename = '../output/text_model.h5'
        checkpoint = ModelCheckpoint(filename,
                                     monitor='val_loss',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='min')
        text_model.fit(X_train,
                       y_train,
                       batch_size=32,
                       epochs=55,
                       validation_data=(X_test, y_test),
                       callbacks=[checkpoint],
                       shuffle=True)

    pass
Beispiel #5
0
def score_new_vid():

    logging.info('Begin extraction for scoring partition')

    # Extract features from vids
    lib.extract_images(partition='score', num_frames=10)
    lib.extract_audio(partition='score')
    lib.extract_text(partition='score', training=False)

    logging.info('Begin transformation for scoring partition')

    # Transform features
    embedding_matrix, word_to_index = resources.create_embedding_matrix()
    lib.transform_images(partition='score', num_frames=10, training=False)
    lib.transform_audio(partition='score', n_mfcc=13, training=False)
    lib.transform_text(partition='score',
                       word_to_index=word_to_index,
                       training=False)

    logging.info('Load models for evaluation of the scoring partition')

    # Load models
    image_model = models.image_lrcn()
    image_model.load_weights('../output/image_model.h5')
    audio_model = pickle.load(open('../output/audio_model.pkl', 'rb'))
    text_model = load_model('../output/text_model.h5')
    ensemble_model = pickle.load(open('../output/ensemble_model.pkl', 'rb'))

    logging.info('Load transformed data')

    # Load image data
    with open('../data/image_data/pickle_files/vid_ids_5d_score.pkl',
              'rb') as file:
        id_img_score = pickle.load(file)

    # Load audio data
    aud_to_score = pd.read_csv('../data/audio_data/pickle_files/score_df.csv')
    X_aud_score = aud_to_score.drop(['video_id'], axis=1)
    id_aud_score = aud_to_score['video_id']

    # Load text data
    with open('../data/text_data/pickle_files/X_score.pkl', 'rb') as file:
        X_text_score = pickle.load(file)
    with open('../data/text_data/pickle_files/vid_ids_score.pkl',
              'rb') as file:
        id_text_score = pickle.load(file)

    # Load generator
    score_generator = DataGenerator(
        partition='training',
        list_IDs=range(len(id_aud_score)),
        labels=[0 for i in range(len(id_aud_score))],
        batch_size=len(id_aud_score),
        n_channels=3,
        dim=(10, 224, 224),
        shuffle=False)

    logging.info('Predict values with image, text and audio models')

    # Predict values
    img_score_df = pd.DataFrame({
        'img_preds':
        [i[0] for i in image_model.predict_generator(score_generator)],
        'video_ids':
        id_img_score
    })
    aud_score_df = pd.DataFrame({
        'aud_preds': audio_model.predict(X_aud_score),
        'video_ids': id_aud_score
    })
    text_score_df = pd.DataFrame({
        'text_preds': [i[0] for i in text_model.predict(X_text_score)],
        'video_ids':
        id_text_score
    })

    logging.info('Make final predictions')

    # Merge predictions
    score_preds = img_score_df.merge(aud_score_df, on='video_ids')
    score_preds = score_preds.merge(text_score_df, on='video_ids')

    # Make final prediction
    X_score = score_preds[['img_preds', 'aud_preds', 'text_preds']]
    score_preds['final_prediction'] = ensemble_model.predict(X_score)

    # Save predictions to disk
    score_preds.to_csv('../output/predictions.csv', index=False)

    pass
Beispiel #6
0
def ensemble():

    logging.info('Begin Ensemble model building, loading models')

    # Load models
    image_model = models.image_lrcn()
    image_model.load_weights('../output/image_model.h5')
    audio_model = pickle.load(open('../output/audio_model.pkl', 'rb'))
    text_model = load_model('../output/text_model.h5')

    # Load labels set
    with open('../data/image_data/pickle_files/y_5d_training.pkl',
              'rb') as file:
        training_labels = pickle.load(file)
    with open('../data/image_data/pickle_files/y_5d_test.pkl', 'rb') as file:
        test_labels = pickle.load(file)

    # Load generators
    training_generator = DataGenerator(partition='training',
                                       list_IDs=range(6000),
                                       labels=training_labels,
                                       batch_size=16,
                                       n_channels=3,
                                       dim=(10, 224, 224),
                                       shuffle=False)
    validation_generator = DataGenerator(partition='test',
                                         list_IDs=range(2000),
                                         labels=test_labels,
                                         batch_size=16,
                                         n_channels=3,
                                         dim=(10, 224, 224),
                                         shuffle=False)
    holdout_generator = DataGenerator(partition='validation',
                                      list_IDs=range(2000),
                                      labels=test_labels,
                                      batch_size=16,
                                      n_channels=3,
                                      dim=(10, 224, 224),
                                      shuffle=False)

    logging.info('Load data files')

    # Load image data
    with open('../data/image_data/pickle_files/y_training.pkl', 'rb') as file:
        y_img_train = pickle.load(file)
    with open('../data/image_data/pickle_files/y_test.pkl', 'rb') as file:
        y_img_test = pickle.load(file)
    with open('../data/image_data/pickle_files/y_validation.pkl',
              'rb') as file:
        y_img_val = pickle.load(file)
    with open('../data/image_data/pickle_files/vid_ids_training.pkl',
              'rb') as file:
        id_img_train = pickle.load(file)
    with open('../data/image_data/pickle_files/vid_ids_test.pkl',
              'rb') as file:
        id_img_test = pickle.load(file)
    with open('../data/image_data/pickle_files/vid_ids_validation.pkl',
              'rb') as file:
        id_img_val = pickle.load(file)

    # Load audio data
    aud_train = pd.read_csv('../data/audio_data/pickle_files/training_df.csv')
    aud_test = pd.read_csv('../data/audio_data/pickle_files/test_df.csv')
    aud_val = pd.read_csv('../data/audio_data/pickle_files/validation_df.csv')
    X_aud_train = aud_train.drop(['interview_score', 'video_id'], axis=1)
    id_aud_train = aud_train['video_id']
    X_aud_test = aud_test.drop(['interview_score', 'video_id'], axis=1)
    id_aud_test = aud_test['video_id']
    X_aud_val = aud_val.drop(['interview_score', 'video_id'], axis=1)
    id_aud_val = aud_val['video_id']

    # Load text data
    with open('../data/text_data/pickle_files/X_training.pkl', 'rb') as file:
        X_text_train = pickle.load(file)
    with open('../data/text_data/pickle_files/X_test.pkl', 'rb') as file:
        X_text_test = pickle.load(file)
    with open('../data/text_data/pickle_files/X_validation.pkl', 'rb') as file:
        X_text_val = pickle.load(file)
    with open('../data/text_data/pickle_files/vid_ids_training.pkl',
              'rb') as file:
        id_text_train = pickle.load(file)
    with open('../data/text_data/pickle_files/vid_ids_test.pkl', 'rb') as file:
        id_text_test = pickle.load(file)
    with open('../data/text_data/pickle_files/vid_ids_validation.pkl',
              'rb') as file:
        id_text_val = pickle.load(file)

    logging.info('Getting predictions for all 3 models')

    # Get predictions
    img_train_df = pd.DataFrame({
        'img_preds':
        [i[0] for i in image_model.predict_generator(training_generator)],
        'video_ids':
        id_img_train,
        'interview_score':
        y_img_train
    })
    img_test_df = pd.DataFrame({
        'img_preds':
        [i[0] for i in image_model.predict_generator(validation_generator)],
        'video_ids':
        id_img_test,
        'interview_score':
        y_img_test
    })
    img_val_df = pd.DataFrame({
        'img_preds':
        [i[0] for i in image_model.predict_generator(holdout_generator)],
        'video_ids':
        id_img_val,
        'interview_score':
        y_img_val
    })
    aud_train_df = pd.DataFrame({
        'aud_preds': audio_model.predict(X_aud_train),
        'video_ids': id_aud_train
    })
    aud_test_df = pd.DataFrame({
        'aud_preds': audio_model.predict(X_aud_test),
        'video_ids': id_aud_test
    })
    aud_val_df = pd.DataFrame({
        'aud_preds': audio_model.predict(X_aud_val),
        'video_ids': id_aud_val
    })
    text_train_df = pd.DataFrame({
        'text_preds': [i[0] for i in text_model.predict(X_text_train)],
        'video_ids':
        id_text_train
    })
    text_test_df = pd.DataFrame({
        'text_preds': [i[0] for i in text_model.predict(X_text_test)],
        'video_ids':
        id_text_test
    })
    text_val_df = pd.DataFrame({
        'text_preds': [i[0] for i in text_model.predict(X_text_val)],
        'video_ids':
        id_text_val
    })

    logging.info('Merge predictions together into single data frame')

    # Merge predictions
    train_preds = img_train_df.merge(aud_train_df, on='video_ids')
    train_preds = train_preds.merge(text_train_df, on='video_ids')
    test_preds = img_test_df.merge(aud_test_df, on='video_ids')
    test_preds = test_preds.merge(text_test_df, on='video_ids')
    val_preds = img_val_df.merge(aud_val_df, on='video_ids')
    val_preds = val_preds.merge(text_val_df, on='video_ids')

    # Score models
    img_train_score = np.sqrt(
        mean_squared_error(train_preds['interview_score'],
                           train_preds['img_preds']))
    img_test_score = np.sqrt(
        mean_squared_error(test_preds['interview_score'],
                           test_preds['img_preds']))
    img_val_score = np.sqrt(
        mean_squared_error(val_preds['interview_score'],
                           val_preds['img_preds']))
    aud_train_score = np.sqrt(
        mean_squared_error(train_preds['interview_score'],
                           train_preds['aud_preds']))
    aud_test_score = np.sqrt(
        mean_squared_error(test_preds['interview_score'],
                           test_preds['aud_preds']))
    aud_val_score = np.sqrt(
        mean_squared_error(val_preds['interview_score'],
                           val_preds['aud_preds']))
    text_train_score = np.sqrt(
        mean_squared_error(train_preds['interview_score'],
                           train_preds['text_preds']))
    text_test_score = np.sqrt(
        mean_squared_error(test_preds['interview_score'],
                           test_preds['text_preds']))
    text_val_score = np.sqrt(
        mean_squared_error(val_preds['interview_score'],
                           val_preds['text_preds']))

    # Print scores to screen
    logging.info('Image score on the training set: {}'.format(img_train_score))
    logging.info('Image score on the test set: {}'.format(img_test_score))
    logging.info('Image score on the val set: {}'.format(img_val_score))
    logging.info('Audio score on the training set: {}'.format(aud_train_score))
    logging.info('Audio score on the test set: {}'.format(aud_test_score))
    logging.info('Audio score on the val set: {}'.format(aud_val_score))
    logging.info('Text score on the training set: {}'.format(text_train_score))
    logging.info('Text score on the test set: {}'.format(text_test_score))
    logging.info('Text score on the val set: {}'.format(text_val_score))

    # Split target variable and features
    X_train = train_preds[['img_preds', 'aud_preds', 'text_preds']]
    y_train = train_preds[['interview_score']]
    X_test = test_preds[['img_preds', 'aud_preds', 'text_preds']]
    y_test = test_preds[['interview_score']]
    X_val = val_preds[['img_preds', 'aud_preds', 'text_preds']]
    y_val = val_preds[['interview_score']]

    logging.info('Build OLS model to combine model outputs')

    # Build OLS model
    ols_model = LinearRegression()
    ols_model.fit(X_train, y_train)

    # Score model
    train_score = np.sqrt(
        mean_squared_error(y_train, ols_model.predict(X_train)))
    test_score = np.sqrt(mean_squared_error(y_test, ols_model.predict(X_test)))
    val_score = np.sqrt(mean_squared_error(y_val, ols_model.predict(X_val)))

    logging.info('OLS Score on training set: {}'.format(train_score))
    logging.info('OLS Score on test set: {}'.format(test_score))
    logging.info('OLS Score on val set: {}'.format(val_score))

    # Save model
    with open('../output/ensemble_model.pkl', 'wb') as fid:
        pickle.dump(ols_model, fid)

    logging.info('Ensemble model saved')

    return
from keras.models import Sequential
from my_classes import DataGenerator

# Parameters
params = {'dim_x': 32,
          'dim_y': 32,
          'dim_z': 32,
          'batch_size': 32,
          'shuffle': True}

# Datasets
partition = # IDs
labels = # Labels

# Generators
training_generator = DataGenerator(**params).generate(labels, partition['train'])
validation_generator = DataGenerator(**params).generate(labels, partition['validation'])

# Design model
model = Sequential()
[...] # Architecture
model.compile()

# Train model on dataset
model.fit_generator(generator = training_generator,
                    steps_per_epoch = len(partition['train'])//batch_size,
                    validation_data = validation_generator,
                    validation_steps = len(partition['validation'])//batch_size)


Beispiel #8
0
with open('/data/data1/users/konpyro/text_feats/ids.pkl', 'rb') as file:
    text_ids = pkl.load(file)

# Parameters
params = {
    'dim': (60, 431),
    'batch_size': 16,
    'n_classes': 4,
    'n_channels': 6,
    'shuffle': False
}

epochs = 10

# Generators
generator = DataGenerator(ids, labels, **params)

# Late fuse model
model_A1 = load_model('/data/data1/users/konpyro/model_A1.h5')
#model_T2 = tf.keras.models.load_model('/data/data1/users/konpyro/model_T2.h5', custom_objects={'BertLayer': bertlayer.BertLayer})
# load json and create model
json_file = open('/data/data1/users/konpyro/model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model_T2 = model_from_json(loaded_model_json,
                           custom_objects={'BertLayer': bertlayer.BertLayer})
# load weights into new model
model_T2.load_weights("/data/data1/users/konpyro/model_weights.h5")
print("Loaded model from disk")

input_shape = (8, )
def cnn_model(pickle_file_path):
    print("Reached cnn model")
    params = {'dim': (15250, 320, 5),
            'batch_size': 10,
            'n_classes': 2,
            'n_channels': 1,
            'shuffle': True} 
    RFI_files = glob.glob(pickle_file_path + '*rfi_*.pkl')
    Furby_files = glob.glob(pickle_file_path + '*fur*.pkl')
    train = []
    validation = []

    labels = []
    train = RFI_files[:-483]
    print(np.shape(train))
    train = np.concatenate((train, Furby_files[:-483]), axis = 0)
    print(np.shape(train))
    validation = RFI_files[-483:]
    print(np.shape(validation))
    validation = np.concatenate((validation, Furby_files[-483:]), axis = 0)
    print(np.shape(validation))
    labels_RFI = np.zeros(np.shape(RFI_files))
    labels_fur = np.ones(np.shape(Furby_files))
    labels = np.concatenate((labels_RFI, labels_fur), axis = 0)
#    print(np.concatenate((RFI_files, Furby_files), axis = 0)[0])


    partition = {'train': train, 'validation': validation}
    label = dict(zip(np.concatenate((RFI_files, Furby_files), axis = 0), labels))
#    print(label)
    print(np.shape(label))
    print(np.shape(partition['train']))
    print(np.shape(partition['validation']))

    # Generators
    training_generator = DataGenerator(partition['train'], label, **params)
    validation_generator = DataGenerator(partition['validation'], label, **params)
 
#    le = LabelEncoder()
#    yy = to_categorical(le.fit_transform(y))

#    X_train, X_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

#    print("split the data into training and test sets")

 #   def correct_dim_cnn(array):
 #       output=[]
 #       for i in array:
 #           print(np.shape(i))
 #           output.append(i.reshape(15250, 320, 5))
 #       return output

 #   X_train = correct_dim_cnn(X_train)
 #   X_test = correct_dim_cnn(X_test)

 #   print(np.shape(X_train))
 #   print(np.shape(X_test))
 #   print(y_test)
 #   print(y_train)



    def create_cnn(height, width, depth, filters=(250, 32, 5), regress=False):
        HWD = (height, width, depth)
        print(HWD)
        Dim = -1
        inputs = Input(shape = HWD)
        for (i, f) in enumerate(filters):
            if i == 0:
                x = inputs
            x = Conv2D(f, (3, 3), padding="valid")(x)
            x = Activation("relu")(x)
            x = MaxPooling2D(pool_size=(2, 2))(x)
            x = BatchNormalization(axis=Dim)(x)
            x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Flatten()(x)
        x = Dense(16)(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=Dim)(x)
        x = Dropout(0.5)(x)
        x = Dense(4)(x)
        x = Activation("relu")(x)
        model = Model(inputs, x)
        return model

    cnn_model = create_cnn(15250, 320, 5, regress=False)
    cnn_model.summary()
    num_labels = 2
    x = Dense(4, activation="relu")(cnn_model.output)
    x = Dense(num_labels, activation="softmax")(x)
    model = Model(inputs=[cnn_model.input], outputs=x)
    run_opts = tf.compat.v1.RunOptions(report_tensor_allocations_upon_oom = True)


    model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001))
    model.summary()
    num_epochs = 100
    num_batch_size = 10
    checkpointer = ModelCheckpoint(filepath= pickle_file_path +'weights.best.ABC_returns_learning_rate_0.001_batch_150_epochs_100.hdf5', verbose=1, save_best_only=True)
    start = datetime.now()

#    with tf.compat.v1.Session( config = tf.compat.v1.ConfigProto( log_device_placement = True ) ):
    model.fit_generator(generator=training_generator, validation_data=validation_generator)

#    model.fit_generator(generator=training_generator,
#            validation_data=validation_generator,
#            use_multiprocessing=True,
#            workers=6, callbacks=[checkpointer], verbose=1)
#    model.fit_generator(generator=training_generator, validation_data=validation_generator)
    
    #model.fit([X_train], y_train, validation_data = ([X_test], y_test), batch_size=num_batch_size, epochs=num_epochs, callbacks=[checkpointer], verbose=1)
    duration = datetime.now() - start
    print("Training completed in time: ", duration)
    score = model.evaluate([x_freq_time_test], y_test, verbose=1)
    accuracy = 100*score
    print("Accuracy on validation set: %.4f%%" % accuracy)
    model_json = model.to_json()
    with open(pickle_file_path + "model_ABC_returns_learning_rate_0.001_batch_150_epochs_100.json", "w") as json_file:
        json_file.write(model_json)
def generator_main():

  #original_dataset_dir = 'C:\\svn\\dwatts\\dev\\datasets\\whale_data\\data'
  #original_train_dataset_dir = 'C:\\svn\\dwatts\\dev\\datasets\\whale_data\\data\\train'
  #base_dir = 'C:\\svn\\dwatts\\dev\\dl_with_python\\whale_small\\'

  original_dataset_dir = '/home/david/data/whale_data/data'
  original_train_dataset_dir = '/home/david/data/whale_data/data/train'
  base_dir = '/home/david/dev/deepLearningForPython/whale_small/'

  # spectrogram parameters
  params = {'batch_size': 64,
            'dim': (48,126),
            'n_channels': 1,
            'n_classes': 2,
            'shuffle': True,
            'NFFT':64,
            'Fs':2000,
            'noverlap':32}

  # Number of time slice metrics
  maxTime = 126

  Ntrain = 5000
  Nval = 500
  Ntest = 500

  train_loc = 'whale_small/train'
  val_loc = 'whale_small/validation'
  # load data, parition and labels:
  # e.g. {'train': ['id-1', 'id-2', 'id-3'], 'validation': ['id-4']}
  # e.g. {'id-1': 0, 'id-2': 1, 'id-3': 2, 'id-4': 1}
  #train = fileio.TrainData(original_dataset_dir+'\\train.csv',base_dir+'train')
  partition, labels = create_dirs(Ntrain, Nval, Ntest, original_dataset_dir, base_dir)

  # Generators
  training_generator = DataGenerator(partition['train'], labels, train_loc, **params)
  validation_generator = DataGenerator(partition['validation'], labels, val_loc, **params)

  for data_batch, labels_batch in training_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    break

  # Determine proper input shape
  input_shape = (params['dim'][0], params['dim'][1], params['n_channels'])

  '''
  model = models.Sequential()
  model.add(layers.Conv2D(32, (3, 3), activation='relu',
  input_shape=input_shape))
  model.add(layers.MaxPooling2D((2, 2),dim_ordering="th"))
  model.add(layers.Conv2D(64, (3, 3), activation='relu'))
  model.add(layers.MaxPooling2D((2, 2),dim_ordering="th"))
  model.add(layers.Conv2D(128, (3, 3), activation='relu'))
  model.add(layers.MaxPooling2D((2, 2),dim_ordering="th"))
  model.add(layers.Conv2D(128, (3, 3), activation='relu'))
  model.add(layers.MaxPooling2D((2, 2),dim_ordering="th"))
  model.add(layers.Flatten())
  model.add(layers.Dense(512, activation='relu'))
  model.add(layers.Dense(2, activation='softmax'))
  '''

  melgram_input = Input(shape=input_shape)

  # Only tf dimension ordering
  channel_axis = 3
  freq_axis = 1
  time_axis = 2

  # Input block
  x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(melgram_input)

  # Conv block 1
  x = Convolution2D(64, 3, 3, border_mode='same', name='conv1')(x)
  x = BatchNormalization(axis=channel_axis, mode=0, name='bn1')(x)
  x = ELU()(x)
  x = MaxPooling2D(pool_size=(2, 4), dim_ordering="th", name='pool1')(x)

  # Conv block 2
  x = Convolution2D(128, 3, 3, border_mode='same', name='conv2')(x)
  x = BatchNormalization(axis=channel_axis, mode=0, name='bn2')(x)
  x = ELU()(x)
  x = MaxPooling2D(pool_size=(2, 4), dim_ordering="th", name='pool2')(x)

  # Conv block 3
  x = Convolution2D(128, 3, 3, border_mode='same', name='conv3')(x)
  x = BatchNormalization(axis=channel_axis, mode=0, name='bn3')(x)
  x = ELU()(x)
  x = MaxPooling2D(pool_size=(2, 4), dim_ordering="th", name='pool3')(x)

  # Conv block 4
  x = Convolution2D(128, 3, 3, border_mode='same', name='conv4')(x)
  x = BatchNormalization(axis=channel_axis, mode=0, name='bn4')(x)
  x = ELU()(x)
  x = MaxPooling2D(pool_size=(3, 5), dim_ordering="th", name='pool4')(x)

  # Conv block 5
  x = Convolution2D(64, 3, 3, border_mode='same', name='conv5')(x)
  x = BatchNormalization(axis=channel_axis, mode=0, name='bn5')(x)
  x = ELU()(x)
  x = MaxPooling2D(pool_size=(4, 4), dim_ordering="th", name='pool5')(x)

  # Output
  x = Flatten()(x)
  x = Dense(50, activation='relu', name='hidden1')(x)
  x = Dense(2, activation='softmax', name='output')(x)

  # Create model
  model = Model(melgram_input, x)


  model.summary()

  from keras import optimizers

  # Compile the model
  model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizers.RMSprop(lr=1e-4),
    metrics=['acc'])

  # Train model on dataset
  history = model.fit_generator(
    training_generator,
    steps_per_epoch=100,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=50)
    #use_multiprocessing=True,
    #workers=1)

  model.save('whale_small_1.h5')

  import matplotlib.pyplot as plt
  acc = history.history['acc']
  val_acc = history.history['val_acc']
  loss = history.history['loss']
  val_loss = history.history['val_loss']
  epochs = range(1, len(acc) + 1)
  plt.plot(epochs, acc, 'bo', label='Training acc')
  plt.plot(epochs, val_acc, 'b', label='Validation acc')
  plt.title('Training and validation accuracy')
  plt.legend()
  plt.figure()
  plt.plot(epochs, loss, 'bo', label='Training loss')
  plt.plot(epochs, val_loss, 'b', label='Validation loss')
  plt.title('Training and validation loss')
  plt.legend()
  plt.show()
Beispiel #11
0
    train_list += partition_dict[partition]
for partition in model_params['test_partitions']:
    test_list += partition_dict[partition]
model_params['train_len'] = len(train_list)
model_params['test_len'] = len(test_list)

# specify generator parameters
params = {'dim': (model_params['num_rows'],model_params['num_cols']),
          'batch_size': model_params['batch_size'],
          'n_classes': model_params['num_classes'],
          'n_channels': model_params['channels'],
          'shuffle': True,
          'scale_image': model_params['preprocess_input'] }

# instantiate the generators
train_generator = DataGenerator(train_list, labels_dict, model_params['augmentation'], **params)
test_generator = DataGenerator(test_list, labels_dict, False, **params)

# log details
log_model_details(model, model_params)

# for convenience
model_rev_name = model_params['model_name'] + ' ' + model_params['rev']
mode = model_params['run_mode']

if mode == 'train' or mode == 'train-eval':
    # train the model on the new data for specified epochs
    model.fit_generator(generator=train_generator,
                        epochs=model_params['training_epochs'],
                        use_multiprocessing=True,
                        workers=8)
Beispiel #12
0
X = []  # List of all training examples
Y = []  # List all corresponding labels
for i in range(0, len(new_text) - maxlen, step):
    X.append(new_text[i:i + maxlen])
    Y.append(new_text[i + maxlen])

# Parameters of the generator
params = {
    'batch_size': 12,
    'shuffle': True,
    'word_indices': word_indices,
    'indices_word': indices_word,
    'maxlen': maxlen
}

training_generator = DataGenerator(X, Y, **params)

model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(words))))
model.add(Dense(len(words), activation='softmax'))

optimizer = RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
Beispiel #13
0
def main(opt):

    if opt.train == '1':
        print(
            '|             Training a CNN based Speaker Verification System                           |'
        )
        print(
            ' ******************************************************************************************\n'
        )

        training_filename = ('training_labels.lst')
        training_list = open(training_filename, "r")
        show_names, show_labels = functions.read_file(training_list)

        # To encode target labels with value between 0 and n_classes-1
        label_encoder = LabelEncoder()
        data_labels = label_encoder.fit_transform(show_labels)
        opt.n_classes = len(np.unique(data_labels))
        print('Number of classes', len(np.unique(data_labels)))

        n_frames = opt.window_size
        n_features1 = 80  #
        n_features2 = 9  #
        n_channels = 1
        optm = optimizers.Adam(lr=0.0001,
                               beta_1=0.9,
                               beta_2=0.999,
                               epsilon=None,
                               decay=0.0,
                               amsgrad=False)

        input_shape1 = (n_frames, n_features1, n_channels)
        input_shape2 = (n_frames, n_features2, n_channels)

        # Partitions
        train_names, val_names = train_test_split(show_names,
                                                  test_size=0.20,
                                                  random_state=4)
        partition = {'train': train_names, 'validation': val_names}

        zipObj = zip(show_names, data_labels)
        labels = dict(
            zipObj
        )  # dictionary looks like this {'id10278/QOq66XogW3Q/00005': 8, ...}

        # Parameters
        params = {
            'dim1': (n_frames, n_features1),
            'dim2': (n_frames, n_features2),
            'n_frames': n_frames,
            'batch_size': opt.batch_size,
            'n_classes': opt.n_classes,
            'n_channels': n_channels,
            'shuffle': True,
            'suffixes': ['.mel2', '.xls3']
        }
        print('DataGenerator Params', params)

        # Generators
        training_generator = DataGenerator(partition['train'], labels,
                                           **params)
        validation_generator = DataGenerator(partition['validation'], labels,
                                             **params)

        # comment out below if loading an existing model instead...
        #  model = functions.cnn(opt, 3, n_filters=[128,256,256], input_shape=input_shape1)

        model_name = 'cnn_pulse.h5'
        if (continue_from_last_trained_model):
            print('Continuing from a saved model...')
            model = load_model(model_name)
            #  model = load_model(model_name, custom_objects=SeqWeightedAttention.get_custom_objects())
            #model.compile(optimizer=optm, loss='categorical_crossentropy', metrics = ['accuracy'])
            #model.set_weights(last_model.get_weights())
        else:
            model = functions.cnn_concat(opt,
                                         3,
                                         n_filters=[128, 256, 256],
                                         input_shape1=input_shape1,
                                         input_shape2=input_shape2)
            #  model = functions.cnn(opt, 3, n_filters=[128,256,256], input_shape=input_shape2)
            model.compile(optimizer=optm,
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

        model.summary()

        checkpoint = ModelCheckpoint(model_name,
                                     monitor='val_acc',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='max')
        callbacks_list = [checkpoint]

        model.fit_generator(
            generator=training_generator,
            epochs=opt.max_epochs,
            validation_data=validation_generator,
            verbose=1,
            shuffle=True,
            #workers=2,
            #use_multiprocessing=True,
            callbacks=callbacks_list)
        print('.... Saving model \n')
    #  model.save(opt.save_dir + model_name, overwrite=True)

    if opt.predict == '1':
        print(' -------------------------------------------------')
        print(
            '|          Prediciting using trained CNN based Speaker Verification Model                            |'
        )
        print(
            '******************************************************************************************************\n'
        )

        validation_trials = 'VoxCeleb-1_validation_trials.txt'
        validation_list = open(validation_trials, "r")
        validation_names = functions.read_trials(validation_list)
        #print(validation_names)
        #exit(1)

        model_name = 'cnn_conv1d3-b3-k11.h5'
        model = load_model('./models/triton/07-09/' + model_name)
        #  model = load_model(opt.save_dir + model_name)
        #  model = load_model(opt.save_dir + model_name, custom_objects=SeqWeightedAttention.get_custom_objects())
        model.summary()
        print('Model %s loaded' % model_name)

        score_file = './scores/' + model_name[:-3]
        functions.predict_by_model(opt, model, validation_names, score_file,
                                   'Embedding')  #  concatenate_1 _mel
        print('.... Done prediction with model : %s' % model_name)
Beispiel #14
0
# To be run as a notebook
# Pipeline for training the model

from model import *
import tensorflow as tf 
from my_classes import DataGenerator
import os
from ds_utils.data_loader import *

#Data Generation
data = get_data()
dataset = DataGenerator(data[:, 0], data[:, 1])
"""
Data Augmentation
"""



# Building the model
input_shape = (32, 32, 32)

def main():
    model = DSModel(input_shape)
    model.build()
    model.compile()

    model.summary()
    input()

    # Training with sortagrad
    hist1 = model.fit(dataset, epochs = 1)
for idx, i in enumerate(devProtocols):
    dev_IDs[idx] = i.split()[0]
    label = i.split()[1]
    # spoof=0 , genuine=1
    dev_labels[idx] = 0 if label == 'spoof' else 1
    labels[dev_IDs[idx]] = dev_labels[idx]

params = {'batch_size': 32, 'n_classes': 2, 'shuffle': True}
partition = {
    'train': train_IDs,
    'validation': dev_IDs[:150] + dev_IDs[len(dev_IDs) - 150:]
}

training_generator = DataGenerator(partition['train'],
                                   labels,
                                   **params,
                                   path_to_dir=path_to_dir +
                                   path_to_trainDataset)
validation_generator = DataGenerator(partition['validation'],
                                     labels,
                                     **params,
                                     path_to_dir=path_to_dir +
                                     path_to_devDataset)

############################################################################################

input_shape = (257, 400, 1)
num_classes = 2


def conv2d_bn(x,
Beispiel #16
0
import numpy as np

from keras.models import Sequential
from my_classes import DataGenerator

# Parameters
params = {'dim': (32,32,32),
          'batch_size': 64,
          'n_classes': 6,
          'n_channels': 1,
          'shuffle': True}

# Datasets
partition = # IDs
labels = # Labels

# Generators
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)

# Design model
model = Sequential()
[...] # Architecture
model.compile()

# Train model on dataset
model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    workers=6)
Beispiel #17
0
        pickle.dump(partition, handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open('labels.pkl', 'wb') as handle:
        pickle.dump(labels, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print('loaded labels, saved files')

params = {
    'dim': (299, 299),
    'batch_size': 16,
    'n_classes': 2,
    'n_channels': 3,
    'shuffle': False
}

print('initializing data generator')
finding = 7
train_gen = DataGenerator(partition['train'], labels, finding, **params)
val_gen = DataGenerator(partition['validation'], labels, finding, **params)

print('making model')
"""
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(299, 299, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
"""