def load_best_lstm_mult_label_multi_class(exp, name_model, we_file_name): exp.pp_data.vocabulary_size = 5000 exp.pp_data.embedding_size = 300 exp.pp_data.max_posts = 1750 exp.pp_data.max_terms_by_post = 300 exp.pp_data.binary_classifier = True exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT exp.pp_data.remove_stopwords = False exp.pp_data.delete_low_tfid = False exp.pp_data.min_df = 0 exp.pp_data.min_tf = 0 exp.pp_data.random_posts = False exp.pp_data.random_users = False exp.pp_data.tokenizing_type = 'WE' exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL exp.use_custom_metrics = False exp.use_valid_set_for_train = True exp.valid_split_from_train_set = 0.0 exp.imbalanced_classes = False lstm = ModelClass(1) lstm.loss_function = 'binary_crossentropy' lstm.optmizer_function = 'adam' lstm.epochs = 15 lstm.batch_size = 32 lstm.patience_train = 10 lstm.use_embedding_pre_train = exp.pp_data.use_embedding lstm.embed_trainable = (lstm.use_embedding_pre_train == (dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC)) neuronios_by_layer = [16] epochs = [32] batch_sizes = [40] dropouts = [0.2] exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() x_test, y_test = exp.pp_data.load_dataset_generic( "/home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_multi_label_test_test_528.df", ['control', 'anxiety', 'depression']) exp.set_period_time_end(time_ini_rep, 'Load data') for neuronios in neuronios_by_layer: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: lstm.epochs = epoch lstm.batch_size = batch_size lstm.patience_train = epoch / 2 exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + \ str(batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' + \ we_file_name lstm.model = exp.load_model(dn.PATH_PROJECT + exp.experiment_name + '.h5') exp.save_geral_configs() exp.save_summary_model(lstm.model) exp.predict_samples(lstm, x_test, y_test)
exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT exp.pp_data.remove_stopwords = False exp.pp_data.delete_low_tfid = False exp.pp_data.min_df = 0 exp.pp_data.min_tf = 0 exp.pp_data.random_posts = False exp.pp_data.random_users = False exp.pp_data.tokenizing_type = 'WE' exp.pp_data.use_embedding = dn.UseEmbedding.RAND exp.pp_data.embedding_type = dn.EmbeddingType.NONE lstm = ModelClass(1) lstm.loss_function = 'binary_crossentropy' lstm.optmizer_function = 'adam' lstm.epochs = 10 lstm.batch_size = 32 lstm.patience_train = 4 lstm.use_embedding_pre_train = exp.pp_data.use_embedding lstm.embed_trainable = True lstm.model = Sequential() lstm.model.add(Embedding(exp.pp_data.vocabulary_size, exp.pp_data.embedding_size, trainable=lstm.embed_trainable)) lstm.model.add(LSTM(64, activation='tanh', dropout=0.2, recurrent_dropout=0.2, return_sequences=True)) lstm.model.add(LSTM(32, activation='tanh', dropout=0.2, recurrent_dropout=0.2)) lstm.model.add(Dense(1, activation='sigmoid')) time_ini_exp = datetime.datetime.now() # exp.k_fold_cross_validation(lstm) exp.test_hypeparams(lstm) exp.set_period_time_end(time_ini_exp, 'Total experiment')
def generate_model(exp, name_model, kernel_name, set_params, function): exp.pp_data.vocabulary_size = 5000 exp.pp_data.embedding_size = 300 exp.pp_data.max_posts = 1750 exp.pp_data.max_terms_by_post = 300 exp.pp_data.binary_classifier = True exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT exp.pp_data.remove_stopwords = False exp.pp_data.delete_low_tfid = False exp.pp_data.min_df = 0 exp.pp_data.min_tf = 0 exp.pp_data.random_posts = False exp.pp_data.random_users = False exp.pp_data.tokenizing_type = 'WE' exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL exp.use_custom_metrics = False exp.use_valid_set_for_train = True exp.valid_split_from_train_set = 0.0 exp.imbalanced_classes = False cnn_lstm = ModelClass(1) cnn_lstm.loss_function = 'binary_crossentropy' cnn_lstm.optmizer_function = 'adadelta' cnn_lstm.epochs = 15 cnn_lstm.batch_size = 32 cnn_lstm.patience_train = 10 filters_by_layer = set_params['filters_by_layer'] neuronios_by_lstm_layer = set_params['neuronios_by_lstm_layer'] dropouts = set_params['dropouts'] dropouts_lstm = set_params['dropouts_lstm'] kernels_size = set_params['kernels_size'] epochs = set_params['epochs'] batch_sizes = set_params['batch_sizes'] np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() for embedding_type in set_params['embedding_types']: for embedding_custom_file in set_params['embedding_custom_files']: for use_embedding in set_params['use_embeddings']: exp.pp_data.embedding_type = embedding_type exp.pp_data.word_embedding_custom_file = embedding_custom_file exp.pp_data.use_embedding = use_embedding exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL exp.set_period_time_end(time_ini_rep, 'Load data') x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data( ) cnn_lstm.use_embedding_pre_train = exp.pp_data.use_embedding cnn_lstm.embed_trainable = ( cnn_lstm.use_embedding_pre_train == ( dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC)) emb_name = function if embedding_custom_file != '': emb_name = exp.pp_data.word_embedding_custom_file.split( '.')[0] we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + \ str(exp.pp_data.use_embedding.value) + '_EF_' + emb_name + kernel_name for filter in filters_by_layer: for kernel_size in kernels_size: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: for dropout_lstm in dropouts_lstm: for neuronios in neuronios_by_lstm_layer: cnn_lstm.epochs = epoch cnn_lstm.batch_size = batch_size cnn_lstm.patience_train = epoch / 2 exp.experiment_name = name_model + '_cnn_lstm' + '_F' + str(filter) + '_K' +\ str(kernel_size) + '_P' + 'None' + '_B' + str(batch_size) +\ '_E' + str(epoch) + '_D' + str(dropout) + '_HLN' + str(filter) +\ '_LSTM_N' + str(neuronios) + '_D' + str(dropout_lstm) + '_' + we_file_name cnn_lstm.model = Sequential() cnn_lstm.model.add( Embedding( exp.pp_data. vocabulary_size, exp.pp_data.embedding_size, trainable=cnn_lstm. embed_trainable, name='emb_' + name_model)) cnn_lstm.model.add( Dropout(dropout, name='dropout_1_' + name_model)) cnn_lstm.model.add( Conv1D( filters=filter, kernel_size=kernel_size, kernel_initializer= 'glorot_uniform', # kernel_regularizer=regularizers.l2(0.03), padding='valid', activation='relu', name='conv_1_' + name_model)) cnn_lstm.model.add( MaxPooling1D( name='max_pool_1_' + name_model)) cnn_lstm.model.add( LSTM(neuronios, activation='tanh', dropout=dropout_lstm, recurrent_dropout= dropout_lstm, return_sequences=True, name='lstm_1_' + name_model)) cnn_lstm.model.add( LSTM(neuronios, activation='tanh', dropout=dropout_lstm, recurrent_dropout= dropout_lstm, return_sequences=True, name='lstm_2_' + name_model)) cnn_lstm.model.add( LSTM(neuronios, activation='tanh', dropout=dropout_lstm, recurrent_dropout= dropout_lstm, name='lstm_3_' + name_model)) cnn_lstm.model.add( Dense(3, activation='sigmoid', name='dense_1_' + name_model)) time_ini_exp = datetime.datetime.now( ) exp.generate_model_hypeparams( cnn_lstm, x_train, y_train, x_valid, y_valid, embedding_matrix) exp.set_period_time_end( time_ini_exp, 'Total experiment') del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix # Test np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() for embedding_type in set_params['embedding_types']: for embedding_custom_file in set_params['embedding_custom_files']: for use_embedding in set_params['use_embeddings']: exp.pp_data.embedding_type = embedding_type exp.pp_data.word_embedding_custom_file = embedding_custom_file exp.pp_data.use_embedding = use_embedding exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL exp.set_period_time_end(time_ini_rep, 'Load data') x_test, y_test = exp.pp_data.load_data() cnn_lstm.use_embedding_pre_train = exp.pp_data.use_embedding cnn_lstm.embed_trainable = ( cnn_lstm.use_embedding_pre_train == ( dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC)) emb_name = function if embedding_custom_file != '': emb_name = exp.pp_data.word_embedding_custom_file.split( '.')[0] we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + \ str(exp.pp_data.use_embedding.value) + '_EF_' + emb_name + kernel_name for filter in filters_by_layer: for kernel_size in kernels_size: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: for dropout_lstm in dropouts_lstm: for neuronios in neuronios_by_lstm_layer: cnn_lstm.epochs = epoch cnn_lstm.batch_size = batch_size cnn_lstm.patience_train = epoch exp.experiment_name = name_model + '_cnn_lstm' + '_F' + str(filter) + '_K' +\ str(kernel_size) + '_P' + 'None' + '_B' + str(batch_size) +\ '_E' + str(epoch) + '_D' + str(dropout) + '_HLN' + str(filter) +\ '_LSTM_N' + str(neuronios) + '_D' + str(dropout_lstm) + '_' + we_file_name cnn_lstm.model = exp.load_model( dn.PATH_PROJECT + exp.experiment_name + '.h5') exp.save_geral_configs( 'Experiment Specific Configuration: ' + exp.experiment_name) exp.save_summary_model( cnn_lstm.model) exp.predict_samples( cnn_lstm, x_test, y_test) del x_test, y_test del cnn_lstm, exp
def generate_model(exp, name_model, kernel_function, set_params, function): exp.pp_data.vocabulary_size = 5000 exp.pp_data.embedding_size = 300 exp.pp_data.max_posts = 1750 exp.pp_data.max_terms_by_post = 300 exp.pp_data.binary_classifier = True exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT exp.pp_data.remove_stopwords = False exp.pp_data.delete_low_tfid = False exp.pp_data.min_df = 0 exp.pp_data.min_tf = 0 exp.pp_data.random_posts = False exp.pp_data.random_users = False exp.pp_data.tokenizing_type = 'WE' exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL exp.use_custom_metrics = False exp.use_valid_set_for_train = True exp.valid_split_from_train_set = 0.0 exp.imbalanced_classes = False lstm = ModelClass(1) lstm.loss_function = 'binary_crossentropy' optimizer_functions = set_params['optimizer_function'] neuronios_by_layer = set_params['neuronios_by_layer'] epochs = set_params['epochs'] batch_sizes = set_params['batch_sizes'] hidden_layers = set_params['hidden_layers'] dropouts = [0.2] np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() for embedding_type in set_params['embedding_types']: for embedding_custom_file in set_params['embedding_custom_files']: for use_embedding in set_params['use_embeddings']: exp.pp_data.embedding_type = embedding_type exp.pp_data.word_embedding_custom_file = embedding_custom_file exp.pp_data.use_embedding = use_embedding exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL exp.set_period_time_end(time_ini_rep, 'Load data') x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data( ) lstm.use_embedding_pre_train = exp.pp_data.use_embedding lstm.embed_trainable = (lstm.use_embedding_pre_train == ( dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC)) emb_name = function if embedding_custom_file != '': emb_name = exp.pp_data.word_embedding_custom_file.split( '.')[0] we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + \ str(exp.pp_data.use_embedding.value) + '_EF_' + emb_name + kernel_function for neuronios in neuronios_by_layer: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: for optmizer_function in optimizer_functions: for hidden_layer in hidden_layers: lstm.optmizer_function = dn.OPTIMIZER_FUNCTIONS[ optmizer_function] lstm.epochs = epoch lstm.batch_size = batch_size lstm.patience_train = epoch / 2 exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + \ str(batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_OF'+ \ lstm.optmizer_function + '_HL' + str(hidden_layer) + '_' + we_file_name lstm.model = Sequential() lstm.model.add( Embedding( exp.pp_data.vocabulary_size, exp.pp_data.embedding_size, trainable=lstm.embed_trainable, name='emb_' + name_model)) for id_hl in range(hidden_layer): lstm.model.add(LSTM(neuronios, activation='tanh', dropout=dropout, recurrent_dropout=dropout, return_sequences=True, name='dense_' + str(id_hl) + '_' + \ name_model)) lstm.model.add( LSTM(neuronios, activation='tanh', dropout=dropout, recurrent_dropout=dropout, name='dense_' + str(id_hl + 1) + '_' + name_model)) lstm.model.add( Dense(3, activation='sigmoid', name='dense_' + str(id_hl + 2) + '_' + name_model)) time_ini_exp = datetime.datetime.now() exp.generate_model_hypeparams( lstm, x_train, y_train, x_valid, y_valid, embedding_matrix) exp.set_period_time_end( time_ini_exp, 'Total experiment') del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix # Test np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() for embedding_type in set_params['embedding_types']: for embedding_custom_file in set_params['embedding_custom_files']: for use_embedding in set_params['use_embeddings']: exp.pp_data.embedding_type = embedding_type exp.pp_data.word_embedding_custom_file = embedding_custom_file exp.pp_data.use_embedding = use_embedding exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL exp.set_period_time_end(time_ini_rep, 'Load data') x_test, y_test = exp.pp_data.load_data() lstm.use_embedding_pre_train = exp.pp_data.use_embedding lstm.embed_trainable = (lstm.use_embedding_pre_train == ( dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC)) emb_name = function if embedding_custom_file != '': emb_name = exp.pp_data.word_embedding_custom_file.split( '.')[0] we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + \ str(exp.pp_data.use_embedding.value) + '_EF_' + emb_name + kernel_function for neuronios in neuronios_by_layer: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: for optmizer_function in optimizer_functions: for hidden_layer in hidden_layers: lstm.optmizer_function = dn.OPTIMIZER_FUNCTIONS[ optmizer_function] lstm.epochs = epoch lstm.batch_size = batch_size lstm.patience_train = epoch / 2 exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + \ str(batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_OF' + \ lstm.optmizer_function + '_HL' + str(hidden_layer) + '_' + we_file_name lstm.model = exp.load_model( dn.PATH_PROJECT + exp.experiment_name + '.h5') exp.save_geral_configs( 'Experiment Specific Configuration: ' + exp.experiment_name) exp.save_summary_model(lstm.model) exp.predict_samples( lstm, x_test, y_test) del x_test, y_test del lstm, exp
def generate_model_5(exp, name_model): exp.pp_data.vocabulary_size = 5000 exp.pp_data.embedding_size = 300 exp.pp_data.max_posts = 1750 exp.pp_data.max_terms_by_post = 300 exp.pp_data.binary_classifier = True exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT exp.pp_data.remove_stopwords = False exp.pp_data.delete_low_tfid = False exp.pp_data.min_df = 0 exp.pp_data.min_tf = 0 exp.pp_data.random_posts = False exp.pp_data.random_users = False exp.pp_data.tokenizing_type = 'WE' exp.pp_data.word_embedding_custom_file = '' exp.pp_data.embedding_type = dn.EmbeddingType.WORD2VEC_CUSTOM exp.pp_data.use_embedding = dn.UseEmbedding.NON_STATIC exp.pp_data.word_embedding_custom_file = 'SMHD-CBOW-AllUsers-300.bin' exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + str(exp.pp_data.use_embedding.value) + \ '_EF_' + exp.pp_data.word_embedding_custom_file lstm = ModelClass(1) lstm.loss_function = 'binary_crossentropy' lstm.optmizer_function = 'adam' lstm.epochs = 15 lstm.batch_size = 32 lstm.patience_train = 10 lstm.use_embedding_pre_train = exp.pp_data.use_embedding lstm.embed_trainable = (lstm.use_embedding_pre_train == ( dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC)) neuronios_by_layer = [16] epochs = [32] batch_sizes = [40] dropouts = [0.2] np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data( ) exp.set_period_time_end(time_ini_rep, 'Load data') for neuronios in neuronios_by_layer: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: lstm.epochs = epoch lstm.batch_size = batch_size lstm.patience_train = epoch / 2 exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str( neuronios) + '_B' + str(batch_size) + '_E' + str( epoch) + '_D' + str(dropout) + '_' + we_file_name lstm.model = Sequential() lstm.model.add( Embedding(exp.pp_data.vocabulary_size, exp.pp_data.embedding_size, trainable=lstm.embed_trainable, name='emb_' + name_model)) lstm.model.add( LSTM(neuronios, activation='tanh', dropout=dropout, recurrent_dropout=dropout, return_sequences=True, name='dense_1_' + name_model)) lstm.model.add( LSTM(neuronios, activation='tanh', dropout=dropout, recurrent_dropout=dropout, return_sequences=True, name='dense_2_' + name_model)) lstm.model.add( LSTM(neuronios, activation='tanh', dropout=dropout, recurrent_dropout=dropout, name='dense_3_' + name_model)) lstm.model.add( Dense(1, activation='sigmoid', name='dense_4_' + name_model)) time_ini_exp = datetime.datetime.now() exp.generate_model_hypeparams(lstm, x_train, y_train, x_valid, y_valid, embedding_matrix) exp.set_period_time_end(time_ini_exp, 'Total experiment') del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix # Test exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() x_test, y_test = exp.pp_data.load_data() exp.save_data_format_train(x_test, name_model + '_x') exp.save_data_format_train(y_test, name_model + '_y') exp.set_period_time_end(time_ini_rep, 'Load data') for neuronios in neuronios_by_layer: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: lstm.epochs = epoch lstm.batch_size = batch_size lstm.patience_train = epoch / 2 exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str( neuronios) + '_B' + str(batch_size) + '_E' + str( epoch) + '_D' + str(dropout) + '_' + we_file_name lstm.model = exp.load_model(dn.PATH_PROJECT + exp.experiment_name + '.h5') exp.save_geral_configs() exp.save_summary_model(lstm.model) exp.predict_samples(lstm, x_test, y_test) del x_test, y_test, lstm, exp
def generate_model_ml_le(exp, name_model, set_params): exp.pp_data.vocabulary_size = 5000 exp.pp_data.embedding_size = 300 exp.pp_data.max_posts = 1750 exp.pp_data.max_terms_by_post = 300 exp.pp_data.binary_classifier = True exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT exp.pp_data.remove_stopwords = False exp.pp_data.delete_low_tfid = False exp.pp_data.min_df = 0 exp.pp_data.min_tf = 0 exp.pp_data.random_posts = False # False = chronological order exp.pp_data.random_users = False exp.pp_data.tokenizing_type = 'WE' exp.pp_data.word_embedding_custom_file = '' exp.pp_data.embedding_type = dn.EmbeddingType.GLOVE_6B exp.pp_data.use_embedding = dn.UseEmbedding.STATIC exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL exp.use_custom_metrics = False exp.use_valid_set_for_train = True exp.valid_split_from_train_set = 0.0 exp.imbalanced_classes = False we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + str(exp.pp_data.use_embedding.value) + \ '_EF_' + 'glove6B300d' ## Load model according configuration lstm = ModelClass(1) lstm.loss_function = 'binary_crossentropy' lstm.optmizer_function = 'adam' lstm.epochs = 15 lstm.batch_size = 32 lstm.patience_train = 10 lstm.use_embedding_pre_train = exp.pp_data.use_embedding lstm.embed_trainable = (lstm.use_embedding_pre_train == (dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC)) # neuronios_by_layer = [16] # epochs = [32] # batch_sizes = [40] # dropouts = [0.2] neuronios_by_layer = set_params['neuronios_by_layer'] epochs = set_params['epochs'] batch_sizes = set_params['batch_sizes'] dropouts = set_params['dropouts'] np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data() exp.set_period_time_end(time_ini_rep, 'Load data') for neuronios in neuronios_by_layer: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: lstm.epochs = epoch lstm.batch_size = batch_size lstm.patience_train = epoch / 2 exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + str( batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' + we_file_name lstm.model = Sequential() lstm.model.add(Embedding(exp.pp_data.vocabulary_size, exp.pp_data.embedding_size, trainable=lstm.embed_trainable, name='emb_' + name_model)) lstm.model.add(LSTM(neuronios, kernel_initializer='lecun_uniform', recurrent_initializer='orthogonal', activation='tanh', dropout=dropout, recurrent_dropout=dropout, return_sequences=True, name='dense_1_' + name_model)) lstm.model.add(LSTM(neuronios, kernel_initializer='lecun_uniform', recurrent_initializer='orthogonal', activation='tanh', dropout=dropout, recurrent_dropout=dropout, return_sequences=True, name='dense_2_' + name_model)) lstm.model.add(LSTM(neuronios, kernel_initializer='lecun_uniform', recurrent_initializer='orthogonal', activation='tanh', dropout=dropout, recurrent_dropout=dropout, name='dense_3_' + name_model)) lstm.model.add(Dense(3, kernel_initializer='lecun_uniform', activation='sigmoid', name='dense_4_' + name_model)) time_ini_exp = datetime.datetime.now() exp.generate_model_hypeparams(lstm, x_train, y_train, x_valid, y_valid, embedding_matrix) exp.set_period_time_end(time_ini_exp, 'Total experiment') del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix # Test exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() x_test, y_test = exp.pp_data.load_data() exp.set_period_time_end(time_ini_rep, 'Load data') for neuronios in neuronios_by_layer: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: lstm.epochs = epoch lstm.batch_size = batch_size lstm.patience_train = epoch / 2 exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + str( batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' + we_file_name lstm.model = exp.load_model(dn.PATH_PROJECT + exp.experiment_name + '.h5') exp.save_geral_configs() exp.save_summary_model(lstm.model) exp.predict_samples(lstm, x_test, y_test) del x_test, y_test, lstm, exp
def load_submodel_anx(exp, name_model, kernel_name, set_params): exp.pp_data.vocabulary_size = 5000 exp.pp_data.embedding_size = 300 exp.pp_data.max_posts = 1750 exp.pp_data.max_terms_by_post = 300 exp.pp_data.binary_classifier = True exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT exp.pp_data.remove_stopwords = False exp.pp_data.delete_low_tfid = False exp.pp_data.min_df = 0 exp.pp_data.min_tf = 0 exp.pp_data.random_posts = False exp.pp_data.random_users = False exp.pp_data.tokenizing_type = 'WE' exp.pp_data.embedding_type = dn.EmbeddingType.GLOVE_CUSTOM exp.pp_data.use_embedding = dn.UseEmbedding.STATIC exp.pp_data.word_embedding_custom_file = 'SMHD-glove-A-D-ADUsers-300.pkl' exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + str(exp.pp_data.use_embedding.value) + \ '_EF_' + exp.pp_data.word_embedding_custom_file.split('.')[0] + kernel_name lstm = ModelClass(1) lstm.loss_function = 'binary_crossentropy' lstm.optmizer_function = 'adam' lstm.epochs = 15 lstm.batch_size = 32 lstm.patience_train = 10 lstm.use_embedding_pre_train = exp.pp_data.use_embedding lstm.embed_trainable = (lstm.use_embedding_pre_train == (dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC)) # neuronios_by_layer = [16] # epochs = [96] # batch_sizes = [20] # dropouts = [0.1] neuronios_by_layer = set_params['neuronios_by_layer'] epochs = set_params['epochs'] batch_sizes = set_params['batch_sizes'] dropouts = set_params['dropouts'] np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data() exp.set_period_time_end(time_ini_rep, 'Load data') for neuronios in neuronios_by_layer: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: lstm.epochs = epoch lstm.batch_size = batch_size lstm.patience_train = epoch / 2 exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + str( batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' + we_file_name lstm.model = Sequential() # Embedding(tamanho_vocabulario, embedding_size, input_length=max_len), onde max_len # representa o corte nos textos após max_len tokens. lstm.model.add(Embedding(exp.pp_data.vocabulary_size, exp.pp_data.embedding_size, trainable=lstm.embed_trainable, name='emb_' + name_model)) lstm.model.add(LSTM(neuronios, activation='tanh', dropout=dropout, recurrent_dropout=dropout, return_sequences=True, name='dense_1_' + name_model)) lstm.model.add(LSTM(neuronios, activation='tanh', dropout=dropout, recurrent_dropout=dropout, return_sequences=True, name='dense_2_' + name_model)) lstm.model.add(LSTM(neuronios, activation='tanh', dropout=dropout, recurrent_dropout=dropout, name='dense_3_' + name_model)) lstm.model.add(Dense(3, activation='sigmoid', name='dense_4_' + name_model)) time_ini_exp = datetime.datetime.now() exp.generate_model_hypeparams(lstm, x_train, y_train, x_valid, y_valid, embedding_matrix) exp.set_period_time_end(time_ini_exp, 'Total experiment') del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix # Test exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() x_test, y_test = exp.pp_data.load_data() exp.set_period_time_end(time_ini_rep, 'Load data') for neuronios in neuronios_by_layer: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: lstm.epochs = epoch lstm.batch_size = batch_size lstm.patience_train = epoch / 2 exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' + str( batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' + we_file_name lstm.model = exp.load_model(dn.PATH_PROJECT + exp.experiment_name + '.h5') exp.save_geral_configs() exp.save_summary_model(lstm.model) exp.predict_samples(lstm, x_test, y_test) del x_test, y_test, lstm, exp
batch_sizes = [20, 40, 80] np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data( ) exp.set_period_time_end(time_ini_rep, 'Load data') for neuronios in neuronios_by_layer: for batch_size in batch_sizes: for epoch in epochs: exp.experiment_name = 'lstm_exp14_L3' + '_N' + str( neuronios) + '_B' + str(batch_size) + '_E' + str(epoch) lstm.epochs = epoch lstm.batch_size = batch_size lstm.patience_train = epoch / 2 data_dim = exp.pp_data.max_terms_by_post timesteps = exp.pp_data.max_posts lstm.model = Sequential() lstm.model.add( LSTM(neuronios, activation='tanh', dropout=0.2, recurrent_dropout=0.2, return_sequences=True, stateful=True, batch_input_shape=(batch_size, timesteps, data_dim))) lstm.model.add(
def generate_model(exp, name_model, kernel_function, set_params): # Configura pre-processamento dos dados para importação exp.pp_data.vocabulary_size = 5000 exp.pp_data.embedding_size = 300 # 300 obrigatório se for usar word_embedding word2vec google neg300 exp.pp_data.max_posts = 1750 exp.pp_data.max_terms_by_post = 300 exp.pp_data.binary_classifier = True exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT exp.pp_data.remove_stopwords = False exp.pp_data.delete_low_tfid = False exp.pp_data.min_df = 0 exp.pp_data.min_tf = 0 exp.pp_data.random_posts = False # False = ordem cronológica exp.pp_data.random_users = False # Não usada, as amostras são sempre random no validation k-fold exp.pp_data.tokenizing_type = 'WE' exp.pp_data.word_embedding_custom_file = '' exp.pp_data.embedding_type = dn.EmbeddingType.GLOVE_6B exp.pp_data.use_embedding = dn.UseEmbedding.STATIC exp.pp_data.word_embedding_custom_file = '' exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL exp.pp_data.type_prediction_label = dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL exp.use_custom_metrics = False exp.use_valid_set_for_train = True exp.valid_split_from_train_set = 0.0 exp.imbalanced_classes = False we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + str(exp.pp_data.use_embedding.value) + \ '_EF_' + 'glove6B300d' + kernel_function ## Gera dados conforme configuração cnn_lstm = ModelClass(1) cnn_lstm.loss_function = 'binary_crossentropy' cnn_lstm.optmizer_function = 'adadelta' cnn_lstm.epochs = 15 cnn_lstm.batch_size = 32 cnn_lstm.patience_train = 10 cnn_lstm.use_embedding_pre_train = exp.pp_data.use_embedding cnn_lstm.embed_trainable = (cnn_lstm.use_embedding_pre_train == (dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC)) # set_params is empty if not bool(set_params): filters_by_layer = [32, 64, 128] neuronios_by_lstm_layer = [64, 128, 256] dropouts = [0.2, 0.5] dropouts_lstm = [0.2, 0.5] else: filters_by_layer = set_params['filters_by_layer'] neuronios_by_lstm_layer = set_params['neuronios_by_lstm_layer'] dropouts = set_params['dropouts'] dropouts_lstm = set_params['dropouts_lstm'] kernels_size = [5] epochs = [10] batch_sizes = [20] # Expected input batch shape: (batch_size, timesteps, data_dim) # Note that we have to provide the full batch_input_shape since the network is stateful. # the sample of index i in batch k is the follow-up for the sample i in batch k-1. np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data() exp.set_period_time_end(time_ini_rep, 'Load data') for filter in filters_by_layer: for kernel_size in kernels_size: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: for dropout_lstm in dropouts_lstm: for neuronios in neuronios_by_lstm_layer: cnn_lstm.epochs = epoch cnn_lstm.batch_size = batch_size cnn_lstm.patience_train = epoch exp.experiment_name = name_model + '_cnn_lstm' + '_F' + str(filter) + '_K' + \ str(kernel_size) + '_P' + 'None' + '_B' + str(batch_size) + \ '_E' + str(epoch) + '_D' + str(dropout) + '_HLN' + \ str(filter) + '_LSTM_N' + str(neuronios) + \ '_D'+ str(dropout_lstm) + '_' + we_file_name cnn_lstm.model = Sequential() cnn_lstm.model.add(Embedding(exp.pp_data.vocabulary_size, exp.pp_data.embedding_size, trainable=cnn_lstm.embed_trainable, name='emb_' + name_model)) cnn_lstm.model.add(Dropout(dropout, name='dropout_1_' + name_model)) cnn_lstm.model.add(Conv1D(filters=filter, kernel_size=kernel_size, kernel_initializer='glorot_uniform', # kernel_regularizer=regularizers.l2(0.03), padding='valid', activation='relu', name='conv_1_' + name_model)) cnn_lstm.model.add(MaxPooling1D(name='max_pool_1_' + name_model)) cnn_lstm.model.add(LSTM(neuronios, activation='tanh', dropout=dropout_lstm, recurrent_dropout=dropout_lstm, return_sequences=True, name='lstm_1_' + name_model)) cnn_lstm.model.add(LSTM(neuronios, activation='tanh', dropout=dropout_lstm, recurrent_dropout=dropout_lstm, return_sequences=True, name='lstm_2_' + name_model)) cnn_lstm.model.add(LSTM(neuronios, activation='tanh', dropout=dropout_lstm, recurrent_dropout=dropout_lstm, name='lstm_3_' + name_model)) cnn_lstm.model.add(Dense(3, activation='sigmoid', name='dense_1_' + name_model)) time_ini_exp = datetime.datetime.now() exp.generate_model_hypeparams(cnn_lstm, x_train, y_train, x_valid, y_valid, embedding_matrix) exp.set_period_time_end(time_ini_exp, 'Total experiment') del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix # Test exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL np.random.seed(dn.SEED) time_ini_rep = datetime.datetime.now() x_test, y_test = exp.pp_data.load_data() exp.set_period_time_end(time_ini_rep, 'Load data') for filter in filters_by_layer: for kernel_size in kernels_size: for batch_size in batch_sizes: for epoch in epochs: for dropout in dropouts: for dropout_lstm in dropouts_lstm: for neuronios in neuronios_by_lstm_layer: cnn_lstm.epochs = epoch cnn_lstm.batch_size = batch_size cnn_lstm.patience_train = epoch exp.experiment_name = name_model + '_cnn_lstm' + '_F' + str(filter) + '_K' + \ str(kernel_size) + '_P' + 'None' + '_B' + str(batch_size) + \ '_E' + str(epoch) + '_D' + str(dropout) + '_HLN' + \ str(filter) + '_LSTM_N' + str(neuronios) + \ '_D'+ str(dropout_lstm) + '_' + we_file_name cnn_lstm.model = exp.load_model(dn.PATH_PROJECT + exp.experiment_name + '.h5') exp.save_geral_configs('Experiment Specific Configuration: ' + exp.experiment_name) exp.save_summary_model(cnn_lstm.model) exp.predict_samples(cnn_lstm, x_test, y_test) del x_test, y_test, cnn_lstm, exp