def pretrain_SdA(corruption_levels, pretraining_epochs, pretraining_pat_epochs, pretrain_lr, pretrain_algo, hidden_layers_sizes, output_folder, base_folder, n_features, n_classes, batch_size, train_seq_len, test_seq_len): """ :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type output_folder: string :param output_folder: folder for costand error graphics with results """ # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=numpy.random.RandomState(), n_ins=n_features, n_outs=n_classes, hidden_layers_sizes=hidden_layers_sizes, corruption_levels=corruption_levels, theano_rng=None) ######################### # PRETRAINING THE MODEL # ######################### if (pretrain_algo == "sgd"): pretrained_sda = pretrain_sda_sgd(sda=sda, pretrain_lr=pretrain_lr, corruption_levels=corruption_levels, global_epochs=pretraining_epochs, pat_epochs=pretraining_pat_epochs, batch_size=batch_size, train_seq_len=train_seq_len, test_seq_len=test_seq_len) else: pretrained_sda = sda ''' pretrained_sda = pretrain_sda_cg( sda=sda, train_names=train_names, window_size=window_size, pretraining_epochs=pretraining_epochs, corruption_levels=corruption_levels, preprocess_algo = pretrain_algo, read_window = read_window ) for i in xrange(sda.n_layers): visualize_pretraining( train_cost = pretrained_sda.dA_layers[i].train_cost_array, valid_error = pretrained_sda.dA_layers[i].valid_error_array, learning_rate = pretrain_lr, corruption_level = corruption_levels[i], n_hidden = sda.dA_layers[i].n_hidden, da_layer = i, datasets_folder = output_folder, base_folder = base_folder ) ''' gc.collect() return pretrained_sda
def pretrain_SdA(corruption_levels, pretraining_epochs, pretraining_pat_epochs, pretrain_lr, pretrain_algo, hidden_layers_sizes, output_folder, base_folder, n_features, n_classes, batch_size, train_seq_len, test_seq_len): """ :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type output_folder: string :param output_folder: folder for costand error graphics with results """ # construct the stacked denoising autoencoder class sda = SdA( numpy_rng = numpy.random.RandomState(), n_ins = n_features, n_outs = n_classes, hidden_layers_sizes = hidden_layers_sizes, corruption_levels = corruption_levels, theano_rng = None ) ######################### # PRETRAINING THE MODEL # ######################### if (pretrain_algo == "sgd"): pretrained_sda = pretrain_sda_sgd( sda = sda, pretrain_lr = pretrain_lr, corruption_levels = corruption_levels, global_epochs = pretraining_epochs, pat_epochs = pretraining_pat_epochs, batch_size = batch_size, train_seq_len = train_seq_len, test_seq_len = test_seq_len ) else: pretrained_sda = sda ''' pretrained_sda = pretrain_sda_cg( sda=sda, train_names=train_names, window_size=window_size, pretraining_epochs=pretraining_epochs, corruption_levels=corruption_levels, preprocess_algo = pretrain_algo, read_window = read_window ) for i in xrange(sda.n_layers): visualize_pretraining( train_cost = pretrained_sda.dA_layers[i].train_cost_array, valid_error = pretrained_sda.dA_layers[i].valid_error_array, learning_rate = pretrain_lr, corruption_level = corruption_levels[i], n_hidden = sda.dA_layers[i].n_hidden, da_layer = i, datasets_folder = output_folder, base_folder = base_folder ) ''' gc.collect() return pretrained_sda
def pretrain_SdA(train_names, valid_names, read_window, read_algo, read_rank, window_size, corruption_levels, pretraining_epochs, pretraining_pat_epochs, pretrain_lr, pretrain_algo, hidden_layers_sizes, output_folder, base_folder): """ Demonstrates how to train and test a stochastic denoising autoencoder. This is demonstrated on ICHI. :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type n_iter: int :param n_iter: maximal number of iterations ot run the optimizer :type datasets: array :param datasets: [train_set, valid_set, test_set] :type output_folder: string :param output_folder: folder for costand error graphics with results """ n_out = 7 # number of output units # numpy random generator numpy_rng = numpy.random.RandomState(89677) # construct the stacked denoising autoencoder class sda = SdA( numpy_rng=numpy_rng, n_ins=window_size, hidden_layers_sizes=hidden_layers_sizes, n_outs=n_out ) ######################### # PRETRAINING THE MODEL # ######################### if (pretrain_algo == "sgd"): pretrained_sda = pretrain_sda_sgd( sda = sda, train_names = train_names, valid_names = valid_names, read_window = read_window, read_algo = read_algo, read_rank = read_rank, window_size = window_size, global_epochs = pretraining_epochs, pat_epochs = pretraining_pat_epochs, pretrain_lr = pretrain_lr, corruption_levels = corruption_levels ) else: pretrained_sda = pretrain_sda_cg( sda=sda, train_names=train_names, window_size=window_size, pretraining_epochs=pretraining_epochs, corruption_levels=corruption_levels, preprocess_algo = pretrain_algo, read_window = read_window ) for i in xrange(sda.n_layers): visualize_pretraining( train_cost = pretrained_sda.dA_layers[i].train_cost_array, valid_error = pretrained_sda.dA_layers[i].valid_error_array, window_size = window_size, learning_rate = pretrain_lr, corruption_level = corruption_levels[i], n_hidden = sda.dA_layers[i].n_hidden, da_layer = i, datasets_folder = output_folder, base_folder = base_folder ) gc.collect() return sda
def train_SdA(datasets, train_names, output_folder, base_folder, window_size, corruption_levels, pretraining_epochs, base, pretrain_lr=0): """ Demonstrates how to train and test a stochastic denoising autoencoder. This is demonstrated on ICHI. :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type n_iter: int :param n_iter: maximal number of iterations ot run the optimizer :type datasets: array :param datasets: [train_set, valid_set, test_set] :type output_folder: string :param output_folder: folder for costand error graphics with results """ # split the datasets (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] # compute number of examples given in training set n_in = window_size*3 # number of input units n_out = 7 # number of output units # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA( numpy_rng=numpy_rng, n_ins=n_in, hidden_layers_sizes=[window_size*2, window_size], n_outs=n_out ) # end-snippet-3 start-snippet-4 ######################### # PRETRAINING THE MODEL # ######################### start_time = timeit.default_timer() pretrained_sda = pretrain_sda_sgd(sda=sda, train_names=train_names, window_size=window_size, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, corruption_levels=corruption_levels) ''' pretrained_sda = pretrain_sda_cg(sda=sda, train_set_x=train_set_x, window_size=window_size, pretraining_epochs=pretraining_epochs, corruption_levels=corruption_levels) ''' end_time = timeit.default_timer() for i in xrange(sda.n_layers): print(i, 'i pretrained') visualize_pretraining(train_cost=pretrained_sda.dA_layers[i].train_cost_array, window_size=window_size, learning_rate=0, corruption_level=corruption_levels[i], n_hidden=sda.dA_layers[i].n_hidden, da_layer=i, datasets_folder=output_folder, base_folder=base_folder) print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) # end-snippet-4 ######################## # FINETUNING THE MODEL # ######################## #create matrices for params of HMM layer train_data_names = ['p10a','p011','p013','p014','p020','p022','p040', 'p045','p048','p09b','p023','p035','p038', 'p09a','p033'] n_train_patients=len(train_data_names) n_visible=pow(base, sda.da_layers_output_size) n_hidden=n_out train_reader = ICHISeqDataReader(train_data_names) pi_values = numpy.zeros((n_hidden,)) a_values = numpy.zeros((n_hidden, n_hidden)) b_values = numpy.zeros((n_hidden, n_visible)) array_from_hidden = numpy.zeros((n_hidden,)) for train_patient in xrange(n_train_patients): #get data divided on sequences with respect to labels train_set_x, train_set_y = train_reader.read_next_doc() train_x_array = train_set_x.get_value() n_train_times = train_x_array.shape[0] - window_size + 1 train_visible_after_sda = numpy.array([sda.get_da_output( train_x_array[time: time+window_size]).ravel() for time in xrange(n_train_times)]).ravel() new_train_visible, new_train_hidden = change_data_for_one_patient( hiddens_patient=train_set_y.eval(), visibles_patient=train_visible_after_sda, window_size=sda.da_layers_output_size, base_for_labels=base ) pi_values, a_values, b_values, array_from_hidden = update_params_on_patient( pi_values=pi_values, a_values=a_values, b_values=b_values, array_from_hidden=array_from_hidden, hiddens_patient=new_train_hidden, visibles_patient=new_train_visible, n_hidden=n_hidden ) gc.collect() pi_values, a_values, b_values = finish_training( pi_values=pi_values, a_values=a_values, b_values=b_values, array_from_hidden=array_from_hidden, n_hidden=n_hidden, n_patients=n_train_patients ) hmm_model = hmm.MultinomialHMM( n_components=n_hidden, startprob=pi_values, transmat=a_values ) hmm_model.n_symbols=n_visible hmm_model.emissionprob_=b_values gc.collect() print('MultinomialHMM created') sda.set_hmm_layer( hmm_model=hmm_model ) return sda
def test_SdA( datasets, output_folder, base_folder, window_size, corruption_levels, pretraining_epochs, training_epochs, pretrain_lr=0, finetune_lr=0, ): """ Demonstrates how to train and test a stochastic denoising autoencoder. This is demonstrated on ICHI. :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type n_iter: int :param n_iter: maximal number of iterations ot run the optimizer :type datasets: array :param datasets: [train_set, valid_set, test_set] :type output_folder: string :param output_folder: folder for costand error graphics with results """ # split the datasets (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] # compute number of examples given in training set n_in = window_size * 3 # number of input units n_out = 7 # number of output units # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print "... building the model" # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=numpy_rng, n_ins=n_in, hidden_layers_sizes=[window_size * 2, window_size], n_outs=n_out) # end-snippet-3 start-snippet-4 ######################### # PRETRAINING THE MODEL # ######################### start_time = timeit.default_timer() pretrained_sda = pretrain_sda_sgd( sda=sda, train_set_x=train_set_x, window_size=window_size, pretraining_epochs=pretraining_epochs, pretrain_lr=pretrain_lr, corruption_levels=corruption_levels, ) """ pretrained_sda = pretrain_sda_cg(sda=sda, train_set_x=train_set_x, window_size=window_size, pretraining_epochs=pretraining_epochs, corruption_levels=corruption_levels) """ end_time = timeit.default_timer() for i in xrange(sda.n_layers): print (i, "i pretrained") visualize_pretraining( train_cost=pretrained_sda.dA_layers[i].train_cost_array, window_size=window_size, learning_rate=0, corruption_level=corruption_levels[i], n_hidden=sda.dA_layers[i].n_hidden, da_layer=i, datasets_folder=output_folder, base_folder=base_folder, ) print >> sys.stderr, ( "The pretraining code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0) ) # end-snippet-4 ######################## # FINETUNING THE MODEL # ######################## start_time = timeit.default_timer() """ finetuned_sda = finetune_sda_sgd(sda=pretrained_sda, datasets=datasets, window_size=window_size, finetune_lr=finetune_lr, training_epochs=training_epochs) """ finetuned_sda = finetune_sda_cg( sda=pretrained_sda, datasets=datasets, window_size=window_size, training_epochs=training_epochs ) end_time = timeit.default_timer() visualize_finetuning( train_cost=finetuned_sda.logLayer.train_cost_array, train_error=finetuned_sda.logLayer.train_error_array, valid_error=finetuned_sda.logLayer.valid_error_array, test_error=finetuned_sda.logLayer.test_error_array, window_size=window_size, learning_rate=0, datasets_folder=output_folder, base_folder=base_folder, )