Beispiel #1
0
def pretrain_SdA(corruption_levels, pretraining_epochs, pretraining_pat_epochs,
                 pretrain_lr, pretrain_algo, hidden_layers_sizes,
                 output_folder, base_folder, n_features, n_classes, batch_size,
                 train_seq_len, test_seq_len):
    """
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
   
    :type output_folder: string
    :param output_folder: folder for costand error graphics with results
    """
    # construct the stacked denoising autoencoder class
    sda = SdA(numpy_rng=numpy.random.RandomState(),
              n_ins=n_features,
              n_outs=n_classes,
              hidden_layers_sizes=hidden_layers_sizes,
              corruption_levels=corruption_levels,
              theano_rng=None)

    #########################
    # PRETRAINING THE MODEL #
    #########################

    if (pretrain_algo == "sgd"):
        pretrained_sda = pretrain_sda_sgd(sda=sda,
                                          pretrain_lr=pretrain_lr,
                                          corruption_levels=corruption_levels,
                                          global_epochs=pretraining_epochs,
                                          pat_epochs=pretraining_pat_epochs,
                                          batch_size=batch_size,
                                          train_seq_len=train_seq_len,
                                          test_seq_len=test_seq_len)
    else:
        pretrained_sda = sda
    '''
        pretrained_sda = pretrain_sda_cg(
            sda=sda,
            train_names=train_names,
            window_size=window_size,
            pretraining_epochs=pretraining_epochs,
            corruption_levels=corruption_levels,
            preprocess_algo = pretrain_algo,
            read_window = read_window
        )
                             
        for i in xrange(sda.n_layers):
        visualize_pretraining(
            train_cost = pretrained_sda.dA_layers[i].train_cost_array,
            valid_error = pretrained_sda.dA_layers[i].valid_error_array,
            learning_rate = pretrain_lr,
            corruption_level = corruption_levels[i],
            n_hidden = sda.dA_layers[i].n_hidden,
            da_layer = i,
            datasets_folder = output_folder,
            base_folder = base_folder
        )
    '''
    gc.collect()
    return pretrained_sda
Beispiel #2
0
def pretrain_SdA(corruption_levels,
                 pretraining_epochs,
                 pretraining_pat_epochs,
                 pretrain_lr,
                 pretrain_algo,
                 hidden_layers_sizes,
                 output_folder,
                 base_folder,
                 n_features,
                 n_classes,
                 batch_size,
                 train_seq_len,
                 test_seq_len):
    """
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
   
    :type output_folder: string
    :param output_folder: folder for costand error graphics with results
    """    
    # construct the stacked denoising autoencoder class
    sda = SdA(
        numpy_rng = numpy.random.RandomState(),
        n_ins = n_features,
        n_outs = n_classes,
        hidden_layers_sizes = hidden_layers_sizes,
        corruption_levels = corruption_levels,
        theano_rng = None
    )
        
    #########################
    # PRETRAINING THE MODEL #
    #########################
        
    if (pretrain_algo == "sgd"):
        pretrained_sda = pretrain_sda_sgd(
            sda = sda,
            pretrain_lr = pretrain_lr,
            corruption_levels = corruption_levels,
            global_epochs = pretraining_epochs,
            pat_epochs = pretraining_pat_epochs,
            batch_size = batch_size,
            train_seq_len = train_seq_len,
            test_seq_len = test_seq_len
        )
    else:
        pretrained_sda = sda
    '''
        pretrained_sda = pretrain_sda_cg(
            sda=sda,
            train_names=train_names,
            window_size=window_size,
            pretraining_epochs=pretraining_epochs,
            corruption_levels=corruption_levels,
            preprocess_algo = pretrain_algo,
            read_window = read_window
        )
                             
        for i in xrange(sda.n_layers):
        visualize_pretraining(
            train_cost = pretrained_sda.dA_layers[i].train_cost_array,
            valid_error = pretrained_sda.dA_layers[i].valid_error_array,
            learning_rate = pretrain_lr,
            corruption_level = corruption_levels[i],
            n_hidden = sda.dA_layers[i].n_hidden,
            da_layer = i,
            datasets_folder = output_folder,
            base_folder = base_folder
        )
    '''
    gc.collect()    
    return pretrained_sda
Beispiel #3
0
def pretrain_SdA(train_names,
                 valid_names,
                 read_window,
                 read_algo,
                 read_rank,                 
                 window_size,
                 corruption_levels,
                 pretraining_epochs,
                 pretraining_pat_epochs,
                 pretrain_lr,
                 pretrain_algo,
                 hidden_layers_sizes,
                 output_folder,
                 base_folder):
    """
    Demonstrates how to train and test a stochastic denoising autoencoder.
    This is demonstrated on ICHI.
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type n_iter: int
    :param n_iter: maximal number of iterations ot run the optimizer
    :type datasets: array
    :param datasets: [train_set, valid_set, test_set]
    
    :type output_folder: string
    :param output_folder: folder for costand error graphics with results
    """

    n_out = 7  # number of output units
    
    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    # construct the stacked denoising autoencoder class
    sda = SdA(
        numpy_rng=numpy_rng,
        n_ins=window_size,
        hidden_layers_sizes=hidden_layers_sizes,
        n_outs=n_out
    )
        
    #########################
    # PRETRAINING THE MODEL #
    #########################
        
    if (pretrain_algo == "sgd"):
        pretrained_sda = pretrain_sda_sgd(
            sda = sda,
            train_names = train_names,
            valid_names = valid_names,
            read_window = read_window,
            read_algo = read_algo,
            read_rank = read_rank,
            window_size = window_size,
            global_epochs = pretraining_epochs,
            pat_epochs = pretraining_pat_epochs,
            pretrain_lr = pretrain_lr,
            corruption_levels = corruption_levels
        )
    else:
        pretrained_sda = pretrain_sda_cg(
            sda=sda,
            train_names=train_names,
            window_size=window_size,
            pretraining_epochs=pretraining_epochs,
            corruption_levels=corruption_levels,
            preprocess_algo = pretrain_algo,
            read_window = read_window
        )
                             
    for i in xrange(sda.n_layers):
        visualize_pretraining(
            train_cost = pretrained_sda.dA_layers[i].train_cost_array,
            valid_error = pretrained_sda.dA_layers[i].valid_error_array,
            window_size = window_size,
            learning_rate = pretrain_lr,
            corruption_level = corruption_levels[i],
            n_hidden = sda.dA_layers[i].n_hidden,
            da_layer = i,
            datasets_folder = output_folder,
            base_folder = base_folder
        )
    
    gc.collect()    
    return sda
def train_SdA(datasets, train_names,
             output_folder, base_folder,
             window_size,
             corruption_levels,
             pretraining_epochs,
             base,
             pretrain_lr=0):
    """
    Demonstrates how to train and test a stochastic denoising autoencoder.
    This is demonstrated on ICHI.
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type n_iter: int
    :param n_iter: maximal number of iterations ot run the optimizer
    :type datasets: array
    :param datasets: [train_set, valid_set, test_set]
    
    :type output_folder: string
    :param output_folder: folder for costand error graphics with results
    """

    # split the datasets
    (train_set_x, train_set_y) = datasets[0]
    (valid_set_x, valid_set_y) = datasets[1]
    (test_set_x, test_set_y) = datasets[2]

    # compute number of examples given in training set
    n_in = window_size*3  # number of input units
    n_out = 7  # number of output units
    
    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(
        numpy_rng=numpy_rng,
        n_ins=n_in,
        hidden_layers_sizes=[window_size*2, window_size],
        n_outs=n_out
    )
    # end-snippet-3 start-snippet-4
        
    #########################
    # PRETRAINING THE MODEL #
    #########################
    
    start_time = timeit.default_timer()
    
    pretrained_sda = pretrain_sda_sgd(sda=sda,
                                  train_names=train_names,
                                  window_size=window_size,
                                  pretraining_epochs=pretraining_epochs,
                                  pretrain_lr=pretrain_lr,
                                  corruption_levels=corruption_levels)
    '''

    pretrained_sda = pretrain_sda_cg(sda=sda,
                                  train_set_x=train_set_x,
                                  window_size=window_size,
                                  pretraining_epochs=pretraining_epochs,
                                  corruption_levels=corruption_levels)
    '''                       
    end_time = timeit.default_timer()
    
    for i in xrange(sda.n_layers):
        print(i, 'i pretrained')
        visualize_pretraining(train_cost=pretrained_sda.dA_layers[i].train_cost_array,
                              window_size=window_size,
                              learning_rate=0,
                              corruption_level=corruption_levels[i],
                              n_hidden=sda.dA_layers[i].n_hidden,
                              da_layer=i,
                              datasets_folder=output_folder,
                              base_folder=base_folder)

    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    # end-snippet-4
    ########################
    # FINETUNING THE MODEL #
    ########################
                          
    #create matrices for params of HMM layer
    train_data_names = ['p10a','p011','p013','p014','p020','p022','p040',
                        'p045','p048','p09b','p023','p035','p038', 'p09a','p033']

    n_train_patients=len(train_data_names)
    
    n_visible=pow(base, sda.da_layers_output_size)
    n_hidden=n_out
        
    train_reader = ICHISeqDataReader(train_data_names)
    
    pi_values = numpy.zeros((n_hidden,))
    a_values = numpy.zeros((n_hidden, n_hidden)) 
    b_values = numpy.zeros((n_hidden, n_visible))
    array_from_hidden = numpy.zeros((n_hidden,))

    for train_patient in xrange(n_train_patients):
        #get data divided on sequences with respect to labels
        train_set_x, train_set_y = train_reader.read_next_doc()
        train_x_array = train_set_x.get_value()
        n_train_times = train_x_array.shape[0] - window_size + 1
        train_visible_after_sda = numpy.array([sda.get_da_output(
                train_x_array[time: time+window_size]).ravel()
                for time in xrange(n_train_times)]).ravel()
                            
        new_train_visible, new_train_hidden = change_data_for_one_patient(
            hiddens_patient=train_set_y.eval(),
            visibles_patient=train_visible_after_sda,
            window_size=sda.da_layers_output_size,
            base_for_labels=base
        )
        
        pi_values, a_values, b_values, array_from_hidden = update_params_on_patient(
            pi_values=pi_values,
            a_values=a_values,
            b_values=b_values,
            array_from_hidden=array_from_hidden,
            hiddens_patient=new_train_hidden,
            visibles_patient=new_train_visible,
            n_hidden=n_hidden
        )
        
        gc.collect()
        
    pi_values, a_values, b_values = finish_training(
        pi_values=pi_values,
        a_values=a_values,
        b_values=b_values,
        array_from_hidden=array_from_hidden,
        n_hidden=n_hidden,
        n_patients=n_train_patients
    )
    
    hmm_model = hmm.MultinomialHMM(
        n_components=n_hidden,
        startprob=pi_values,
        transmat=a_values
    )
    
    hmm_model.n_symbols=n_visible
    hmm_model.emissionprob_=b_values 
    gc.collect()
    print('MultinomialHMM created')
    
    sda.set_hmm_layer(
        hmm_model=hmm_model
    )
    return sda
def test_SdA(
    datasets,
    output_folder,
    base_folder,
    window_size,
    corruption_levels,
    pretraining_epochs,
    training_epochs,
    pretrain_lr=0,
    finetune_lr=0,
):
    """
    Demonstrates how to train and test a stochastic denoising autoencoder.
    This is demonstrated on ICHI.
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type n_iter: int
    :param n_iter: maximal number of iterations ot run the optimizer
    :type datasets: array
    :param datasets: [train_set, valid_set, test_set]
    
    :type output_folder: string
    :param output_folder: folder for costand error graphics with results
    """

    # split the datasets
    (train_set_x, train_set_y) = datasets[0]
    (valid_set_x, valid_set_y) = datasets[1]
    (test_set_x, test_set_y) = datasets[2]

    # compute number of examples given in training set
    n_in = window_size * 3  # number of input units
    n_out = 7  # number of output units

    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print "... building the model"
    # construct the stacked denoising autoencoder class
    sda = SdA(numpy_rng=numpy_rng, n_ins=n_in, hidden_layers_sizes=[window_size * 2, window_size], n_outs=n_out)
    # end-snippet-3 start-snippet-4

    #########################
    # PRETRAINING THE MODEL #
    #########################

    start_time = timeit.default_timer()

    pretrained_sda = pretrain_sda_sgd(
        sda=sda,
        train_set_x=train_set_x,
        window_size=window_size,
        pretraining_epochs=pretraining_epochs,
        pretrain_lr=pretrain_lr,
        corruption_levels=corruption_levels,
    )
    """

    pretrained_sda = pretrain_sda_cg(sda=sda,
                                  train_set_x=train_set_x,
                                  window_size=window_size,
                                  pretraining_epochs=pretraining_epochs,
                                  corruption_levels=corruption_levels)
    """
    end_time = timeit.default_timer()

    for i in xrange(sda.n_layers):
        print (i, "i pretrained")
        visualize_pretraining(
            train_cost=pretrained_sda.dA_layers[i].train_cost_array,
            window_size=window_size,
            learning_rate=0,
            corruption_level=corruption_levels[i],
            n_hidden=sda.dA_layers[i].n_hidden,
            da_layer=i,
            datasets_folder=output_folder,
            base_folder=base_folder,
        )

    print >> sys.stderr, (
        "The pretraining code for file "
        + os.path.split(__file__)[1]
        + " ran for %.2fm" % ((end_time - start_time) / 60.0)
    )
    # end-snippet-4
    ########################
    # FINETUNING THE MODEL #
    ########################
    start_time = timeit.default_timer()
    """
    finetuned_sda = finetune_sda_sgd(sda=pretrained_sda,
                                    datasets=datasets,
                                    window_size=window_size,
                                    finetune_lr=finetune_lr,
                                    training_epochs=training_epochs)
    """
    finetuned_sda = finetune_sda_cg(
        sda=pretrained_sda, datasets=datasets, window_size=window_size, training_epochs=training_epochs
    )

    end_time = timeit.default_timer()

    visualize_finetuning(
        train_cost=finetuned_sda.logLayer.train_cost_array,
        train_error=finetuned_sda.logLayer.train_error_array,
        valid_error=finetuned_sda.logLayer.valid_error_array,
        test_error=finetuned_sda.logLayer.test_error_array,
        window_size=window_size,
        learning_rate=0,
        datasets_folder=output_folder,
        base_folder=base_folder,
    )