Python xavier_init Examples

Programming Language: Python

Namespace/Package Name: utilsnn

Method/Function: xavier_init

Examples at hotexamples.com: 8

Python xavier_init - 8 examples found. These are the top rated real world Python examples of utilsnn.xavier_init extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: AEFFNN.py Project: Scottnan/rbm

    def def_network(self, lr):
        next_layer_input = self.x
        for i in range(len(self.layer_sizes)):
            dim = self.layer_sizes[i]
            input_dim = int(next_layer_input.get_shape()[1])
            W = tf.Variable(xavier_init(input_dim, dim, const=1.0),
                            dtype=tf.float32)
            b = tf.Variable(tf.zeros([dim]), dtype=tf.float32)
            self.encoding_matrices.append(W)
            self.encoding_biases.append(b)
            output = tf.nn.sigmoid(tf.matmul(next_layer_input, W) + b)
            next_layer_input = output
        self.encoded_x = next_layer_input
        # FFNN
        self.W = tf.Variable(tf.zeros([self.layer_sizes[-1], self.FFNN_layer],
                                      np.float32),
                             name='Weight_FFNN')
        self.b = tf.Variable(tf.zeros([self.FFNN_layer], np.float32),
                             name='bias_FFNN')
        self.W_out = tf.Variable(tf.zeros([self.FFNN_layer, self.n_class],
                                          np.float32),
                                 name='Weight_output')
        self.b_out = tf.Variable(tf.zeros([self.n_class], np.float32),
                                 name='bias_output')
        # compute cost
        # self.cost = tf.sqrt(tf.reduce_mean(tf.square(self.x - self.reconstructed_x)))
        # self.cost = tf.sqrt(tf.reduce_mean(tf.square((tf.matmul(self.x, self.W) + self.b) - self.y)))

        self.y_ = tf.matmul(self.encoded_x, self.W) + self.b
        self.y_logits = tf.matmul(self.y_, self.W_out) + self.b_out
        tf.add_to_collection('pred_network', self.y_logits)
        self.cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=self.y_logits,
                                                    labels=self.y,
                                                    name='cross_entropy'))
        self.optimizer = tf.train.AdamOptimizer(lr).minimize(self.cost)
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)

Example #2

Show file

File: au.py Project: Cospel/rbm-ae-tf

    def __init__(self, input_size, layer_sizes, layer_names, tied_weights=False, optimizer=tf.train.AdamOptimizer(),
                 transfer_function=tf.nn.sigmoid):

        self.layer_names  = layer_names
        self.tied_weights = tied_weights

        # Build the encoding layers
        self.x = tf.placeholder("float", [None, input_size])
        next_layer_input = self.x

        assert len(layer_sizes) == len(layer_names)

        self.encoding_matrices = []
        self.encoding_biases = []
        for i in range(len(layer_sizes)):
            dim = layer_sizes[i]
            input_dim = int(next_layer_input.get_shape()[1])

            # Initialize W using xavier initialization
            W = tf.Variable(xavier_init(input_dim, dim, transfer_function), name=layer_names[i][0])

            # Initialize b to zero
            b = tf.Variable(tf.zeros([dim]), name=layer_names[i][1])

            # We are going to use tied-weights so store the W matrix for later reference.
            self.encoding_matrices.append(W)
            self.encoding_biases.append(b)

            output = transfer_function(tf.matmul(next_layer_input, W) + b)

            # the input into the next layer is the output of this layer
            next_layer_input = output

        # The fully encoded x value is now stored in the next_layer_input
        self.encoded_x = next_layer_input

        # build the reconstruction layers by reversing the reductions
        layer_sizes.reverse()
        self.encoding_matrices.reverse()

        self.decoding_matrices = []
        self.decoding_biases = []

        for i, dim in enumerate(layer_sizes[1:] + [int(self.x.get_shape()[1])]):
            W = None
            # if we are using tied weights, so just lookup the encoding matrix for this step and transpose it
            if tied_weights:
                W = tf.identity(tf.transpose(self.encoding_matrices[i]))
            else:
                W = tf.Variable(xavier_init(self.encoding_matrices[i].get_shape()[1].value,self.encoding_matrices[i].get_shape()[0].value, transfer_function))
            b = tf.Variable(tf.zeros([dim]))
            self.decoding_matrices.append(W)
            self.decoding_biases.append(b)

            output = transfer_function(tf.matmul(next_layer_input, W) + b)
            next_layer_input = output

        # i need to reverse the encoding matrices back for loading weights
        self.encoding_matrices.reverse()
        self.decoding_matrices.reverse()

        # the fully encoded and reconstructed value of x is here:
        self.reconstructed_x = next_layer_input

        # compute cost
        self.cost = tf.sqrt(tf.reduce_mean(tf.square(self.x - self.reconstructed_x)))
        self.optimizer = optimizer.minimize(self.cost)

        # initalize variables
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)

Example #3

Show file

File: au.py Project: xjump/rbm-ae-tf

    def __init__(self,
                 input_size,
                 layer_sizes,
                 layer_names,
                 tied_weights=False,
                 optimizer=tf.train.AdamOptimizer(),
                 transfer_function=tf.nn.sigmoid):

        self.layer_names = layer_names
        self.tied_weights = tied_weights

        # Build the encoding layers
        self.x = tf.placeholder("float", [None, input_size])
        next_layer_input = self.x

        assert len(layer_sizes) == len(layer_names)

        self.encoding_matrices = []
        self.encoding_biases = []
        for i in range(len(layer_sizes)):
            dim = layer_sizes[i]
            input_dim = int(next_layer_input.get_shape()[1])

            # Initialize W using xavier initialization
            W = tf.Variable(xavier_init(input_dim, dim, transfer_function),
                            name=layer_names[i][0])

            # Initialize b to zero
            b = tf.Variable(tf.zeros([dim]), name=layer_names[i][1])

            # We are going to use tied-weights so store the W matrix for later reference.
            self.encoding_matrices.append(W)
            self.encoding_biases.append(b)

            output = transfer_function(tf.matmul(next_layer_input, W) + b)

            # the input into the next layer is the output of this layer
            next_layer_input = output

        # The fully encoded x value is now stored in the next_layer_input
        self.encoded_x = next_layer_input

        # build the reconstruction layers by reversing the reductions
        layer_sizes.reverse()
        self.encoding_matrices.reverse()

        self.decoding_matrices = []
        self.decoding_biases = []

        for i, dim in enumerate(layer_sizes[1:] +
                                [int(self.x.get_shape()[1])]):
            W = None
            # if we are using tied weights, so just lookup the encoding matrix for this step and transpose it
            if tied_weights:
                W = tf.identity(tf.transpose(self.encoding_matrices[i]))
            else:
                W = tf.Variable(
                    xavier_init(self.encoding_matrices[i].get_shape()[1].value,
                                self.encoding_matrices[i].get_shape()[0].value,
                                transfer_function))
            b = tf.Variable(tf.zeros([dim]))
            self.decoding_matrices.append(W)
            self.decoding_biases.append(b)

            output = transfer_function(tf.matmul(next_layer_input, W) + b)
            next_layer_input = output

        # i need to reverse the encoding matrices back for loading weights
        self.encoding_matrices.reverse()
        self.decoding_matrices.reverse()

        # the fully encoded and reconstructed value of x is here:
        self.reconstructed_x = next_layer_input

        # compute cost
        self.cost = tf.sqrt(
            tf.reduce_mean(tf.square(self.x - self.reconstructed_x)))
        self.optimizer = optimizer.minimize(self.cost)

        # initalize variables
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)

Example #4

Show file

File: AutoEncoder.py Project: vlasta-kus/rbm-tf

    def __init__(self,
                 architecture,
                 layer_names,
                 tied_weights=False,
                 optimizer=tf.train.AdamOptimizer()):
        DEFAULT_ACTIVATION = tf.nn.sigmoid

        #input_size  = architecture[0]['nodes']
        #layer_sizes = [d['nodes'] for d in architecture[1:]]
        self.layer_names = layer_names
        self.tied_weights = tied_weights

        self.datetime = "000"  #datetime.now().strftime(r"%y%m%d_%H%M")
        self.step = 0

        assert len(architecture[1:]) == len(layer_names)

        # Build the encoding layers
        self.x = tf.placeholder("float", [None, architecture[0]['nodes']],
                                name="x_in")
        next_layer_input = self.x

        # Build encoder (hidden layers)
        self.encoding_matrices = []
        self.encoding_biases = []
        for i, layer in enumerate(architecture[1:]):
            input_dim = int(next_layer_input.get_shape()[1])
            dim = layer['nodes']
            transfer_function = (layer['activation'] if 'activation' in layer
                                 else DEFAULT_ACTIVATION)

            # Initialize W using xavier initialization
            W = tf.Variable(initial_value=xavier_init(input_dim, dim,
                                                      transfer_function),
                            name=layer_names[i][0])

            # Initialize b to zero
            b = tf.Variable(tf.zeros([dim]), name=layer_names[i][1])

            # We are going to use tied-weights so store the W matrix for later reference.
            self.encoding_matrices.append(W)
            self.encoding_biases.append(b)

            output = transfer_function(tf.matmul(next_layer_input, W) + b)

            # the input into the next layer is the output of this layer
            next_layer_input = output

        # The fully encoded x value is now stored in the next_layer_input
        self.encoded_x = next_layer_input

        # build the reconstruction layers by reversing the reductions
        architecture.reverse()
        self.encoding_matrices.reverse()

        self.decoding_matrices = []
        self.decoding_biases = []

        for i, layer in enumerate(architecture[1:]):
            W = None
            transfer_function = (layer['activation'] if 'activation' in layer
                                 else DEFAULT_ACTIVATION)
            # if we are using tied weights, so just lookup the encoding matrix for this step and transpose it
            if tied_weights:
                W = tf.identity(tf.transpose(self.encoding_matrices[i]))
            else:
                W = tf.Variable(
                    xavier_init(self.encoding_matrices[i].get_shape()[1].value,
                                self.encoding_matrices[i].get_shape()[0].value,
                                transfer_function))
            b = tf.Variable(tf.zeros([layer['nodes']]))
            self.decoding_matrices.append(W)
            self.decoding_biases.append(b)

            output = transfer_function(tf.matmul(next_layer_input, W) + b)
            next_layer_input = output

        # need to reverse the encoding matrices back for loading weights
        self.encoding_matrices.reverse()
        self.decoding_matrices.reverse()
        # also reverse back the original architecture design
        architecture.reverse()

        # the fully encoded and reconstructed value of x is here:
        self.reconstructed_x = next_layer_input

        # compute cost and run optimizer
        self.total_updates = tf.Variable(0, trainable=False)
        #self.cost = tf.losses.mean_squared_error(self.reconstructed_x, self.x)
        self.cost = tf.reduce_mean(
            self.crossEntropy(self.reconstructed_x, self.x))
        self.optimizer = optimizer.minimize(self.cost,
                                            global_step=self.total_updates)

        # compute MSE and cosine similarity
        self.mse = tf.losses.mean_squared_error(self.reconstructed_x, self.x)
        self.cosSim = self.cosSim(self.reconstructed_x, self.x)
        # fill summary charts & histograms
        with tf.name_scope("Finetuning"):
            self.summary_cost = tf.summary.scalar('cost', self.cost)
            self.summary_mse = tf.summary.scalar('MSE', self.mse)
            self.summary_cossim = tf.summary.scalar('cosine_similarity',
                                                    self.cosSim)

        # initalize variables
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)

        # for TensorBoard
        #self.merged_summaries = tf.summary.merge_all() # merge all the summaries and write them out
        self.logger = tf.summary.FileWriter("log/", self.sess.graph)

Example #5

Show file

File: au.py Project: italoarruda/deep-learning-projects

    def __init__(self,
                 input_size,
                 layer_sizes,
                 layer_names,
                 tied_weights=False,
                 keep_prob=1,
                 optimizer=tf.train.AdamOptimizer(),
                 transfer_function_enc=tf.nn.relu,
                 transfer_function_dec=tf.nn.sigmoid,
                 l2reg=5e-4,
                 regtype='none',
                 loss_func='mean_squared'):

        self.layer_names = layer_names
        self.tied_weights = tied_weights
        self.keep_prob = tf.placeholder(tf.float32)
        self.keep_prob_value = keep_prob
        self.l2reg = l2reg
        self.regtype = regtype
        self.loss_func = loss_func
        # Build the encoding layers
        self.x = tf.placeholder("float", [None, input_size])
        next_layer_input = self.x

        assert len(layer_sizes) == len(layer_names)

        self.encoding_matrices = []
        self.encoding_biases = []
        for i in range(len(layer_sizes)):
            dim = layer_sizes[i]
            input_dim = int(next_layer_input.get_shape()[1])

            # Initialize W using xavier initialization
            W = tf.Variable(xavier_init(input_dim, dim, transfer_function_enc),
                            name=layer_names[i][0])

            # Initialize b to zero
            b = tf.Variable(tf.zeros([dim]), name=layer_names[i][1])

            # We are going to use tied-weights so store the W matrix for later reference.
            self.encoding_matrices.append(W)
            self.encoding_biases.append(b)

            output = transfer_function_enc(tf.matmul(next_layer_input, W) + b)
            output = tf.nn.dropout(output, self.keep_prob)

            # the input into the next layer is the output of this layer
            next_layer_input = output

        # The fully encoded x value is now stored in the next_layer_input
        self.encoded_x = next_layer_input

        # build the reconstruction layers by reversing the reductions
        layer_sizes.reverse()
        self.encoding_matrices.reverse()

        self.decoding_matrices = []
        self.decoding_biases = []

        for i, dim in enumerate(layer_sizes[1:] +
                                [int(self.x.get_shape()[1])]):
            W = None
            # if we are using tied weights, so just lookup the encoding matrix for this step and transpose it
            if tied_weights:
                W = tf.transpose(self.encoding_matrices[i])
            else:
                #W = tf.Variable(tf.transpose(self.encoding_matrices[i].initialized_value()))
                W = tf.Variable(
                    xavier_init(self.encoding_matrices[i].get_shape()[1].value,
                                self.encoding_matrices[i].get_shape()[0].value,
                                transfer_function_dec))
            b = tf.Variable(tf.zeros([dim]))
            self.decoding_matrices.append(W)
            self.decoding_biases.append(b)

            output = transfer_function_dec(tf.matmul(next_layer_input, W) + b)
            output = tf.nn.dropout(output, self.keep_prob)
            next_layer_input = output

        # i need to reverse the encoding matrices back for loading weights
        self.encoding_matrices.reverse()
        self.decoding_matrices.reverse()

        # the fully encoded and reconstructed value of x is here:
        self.reconstructed_x = next_layer_input

        # compute cost
        vars = []
        vars.extend(self.encoding_matrices)
        vars.extend(self.encoding_biases)
        regterm = self.compute_regularization(vars)

        if self.loss_func == 'cross_entropy':
            clip_inf = tf.clip_by_value(self.reconstructed_x, 1e-10,
                                        float('inf'))
            clip_sup = tf.clip_by_value(1 - self.reconstructed_x, 1e-10,
                                        float('inf'))
            cost = -tf.reduce_mean(self.x * tf.log(clip_inf) +
                                   (1 - self.x) * tf.log(clip_sup))

        elif self.loss_func == 'softmax_cross_entropy':
            softmax = tf.nn.softmax(self.reconstructed_x)
            cost = -tf.reduce_mean(self.x * tf.log(softmax) +
                                   (1 - self.x) * tf.log(1 - softmax))

        else:
            #mean_squared
            cost = tf.sqrt(
                tf.reduce_mean(tf.square(self.x - self.reconstructed_x)))

        self.cost = (cost + regterm) if regterm is not None else (cost)
        #_ = tf.scalar_summary(self.loss_func, self.cost)
        self.optimizer = optimizer.minimize(self.cost)

        # initalize variables
        init = tf.initialize_all_variables()
        self.sess = tf.Session()
        self.sess.run(init)

Example #6

Show file

    def __init__(self,
                 input_size,
                 n_classes,
                 layer_sizes,
                 layer_names,
                 finetune_learning_rate=0.001,
                 momentum=0.5,
                 keep_prob=1,
                 transfer_function=tf.nn.sigmoid,
                 l2reg=5e-4,
                 regtype='none',
                 loss_func='softmax_cross_entropy',
                 opt='gradient_descent',
                 dir_='dbn'):

        self.layer_names = layer_names
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob-input')
        self.keep_prob_value = keep_prob
        self.l2reg = l2reg
        self.regtype = regtype
        self.loss_func = loss_func
        self.opt = opt
        self.momentum = momentum

        # Build the encoding layers
        self.x = tf.placeholder("float", [None, input_size], name='x-input')
        self.y = tf.placeholder("float", [None, n_classes], name='y-input')
        self.finetune_learning_rate = finetune_learning_rate
        next_layer_input = self.x

        assert len(layer_sizes) == len(layer_names)

        self.encoding_matrices = []
        self.encoding_biases = []
        for i in range(len(layer_sizes)):
            dim = layer_sizes[i]
            input_dim = int(next_layer_input.get_shape()[1])

            # Initialize W using xavier initialization
            W = tf.Variable(xavier_init(input_dim, dim, transfer_function),
                            name=layer_names[i][0])

            # Initialize b to zero
            b = tf.Variable(tf.zeros([dim]), name=layer_names[i][1])

            # We are going to use tied-weights so store the W matrix for later reference.
            self.encoding_matrices.append(W)
            self.encoding_biases.append(b)

            output = transfer_function(tf.matmul(next_layer_input, W) + b)
            output = tf.nn.dropout(output, self.keep_prob)

            # the input into the next layer is the output of this layer
            next_layer_input = output

        # The fully encoded x value is now stored in the next_layer_input
        self.encoded_x = next_layer_input
        self.last_W = tf.Variable(tf.truncated_normal(
            [self.encoded_x.get_shape()[1].value, n_classes], stddev=0.1),
                                  name='sm-weigths')
        self.last_b = tf.Variable(tf.constant(0.1, shape=[n_classes]),
                                  name='sm-biases')
        self.last_out = tf.add(tf.matmul(self.encoded_x, self.last_W),
                               self.last_b)
        #self.layer_nodes.append(last_out)
        #self.last_out = last_out

        # build the reconstruction layers by reversing the reductions

        # compute cost
        vars = []
        vars.extend(self.encoding_matrices)
        vars.extend(self.encoding_biases)
        regterm = self.compute_regularization(vars)

        if self.loss_func == 'cross_entropy':
            clip_inf = tf.clip_by_value(self.last_out, 1e-10, float('inf'))
            clip_sup = tf.clip_by_value(1 - self.last_out, 1e-10, float('inf'))
            cost = -tf.reduce_mean(self.y * tf.log(clip_inf + 1e-50) +
                                   (1 - self.y) * tf.log(clip_sup + 1e-50))

        elif self.loss_func == 'softmax_cross_entropy':
            #softmax = tf.add(tf.nn.softmax(self.last_out),1e-50)
            #sparse_
            cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(self.last_out, self.y))
        else:
            #mean_squared
            cost = tf.sqrt(tf.reduce_mean(tf.square(self.y - self.last_out)))

        with tf.name_scope('Loss-target'):
            self.cost = (cost + regterm) if regterm is not None else (cost)

        with tf.name_scope('OPT-target'):
            if self.opt == 'gradient_descent':
                self.optimizer = tf.train.GradientDescentOptimizer(
                    self.finetune_learning_rate).minimize(self.cost)
            elif self.opt == 'ada_grad':
                self.optimizer = tf.train.AdagradOptimizer(
                    self.finetune_learning_rate).minimize(self.cost)
            elif self.opt == 'momentum':
                self.optimizer = tf.train.MomentumOptimizer(
                    self.finetune_learning_rate,
                    self.momentum).minimize(self.cost)
            elif self.opt == 'adam':
                self.optimizer = tf.train.AdamOptimizer(
                    self.finetune_learning_rate).minimize(self.cost)

        # initalize variables
        self.model_predictions = tf.argmax(self.last_out, 1)
        correct_prediction = tf.equal(self.model_predictions,
                                      tf.argmax(self.y, 1))

        with tf.name_scope('Accuracy'):
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                   "float"))

        self.loss_sup_summary = tf.scalar_summary("loss-sup", self.cost)
        self.accuracy_summary = tf.scalar_summary('accuracy', self.accuracy)

        init = tf.initialize_all_variables()
        self.sess = tf.Session()
        self.sess.run(init)

        logs_path = './tf-logs/' + dir_
        self.summary_writer = tf.train.SummaryWriter(
            logs_path, graph=tf.get_default_graph())

Example #7

Show file

    def __init__(self, RANDOM_INIT, ALL_WEIGHTS_TRAINABLE, input_size, layer_sizes, layer_names,
                 optimizer=tf.train.AdamOptimizer(),
                 transfer_function=tf.nn.sigmoid):

        self.keep_prob = tf.placeholder(tf.float32)
        self.RANDOM_INIT = RANDOM_INIT
        self.ALL_WEIGHTS_TRAINABLE = ALL_WEIGHTS_TRAINABLE
        self.layer_names = layer_names

        # Build the encoding layers
        self.x = tf.placeholder(tf.float32, [None, input_size])
        next_layer_input = self.x

        assert len(layer_sizes) == len(layer_names)

        self.encoding_matrices = []
        self.encoding_biases = []
        for i in range(len(layer_sizes)):
            dim = layer_sizes[i]
            input_dim = int(next_layer_input.get_shape()[1])

            # Initialize W using xavier initialization
            # W = tf.get_variable(name=layer_names[i][0],shape=(input_dim, dim),dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
            W = tf.Variable(xavier_init(input_dim, dim, transfer_function, self.RANDOM_INIT), name=layer_names[i][0],
                            trainable=self.ALL_WEIGHTS_TRAINABLE)

            # Initialize b to zero
            b = tf.Variable(tf.zeros([dim]), name=layer_names[i][1], trainable=self.ALL_WEIGHTS_TRAINABLE)

            # We are going to use tied-weights so store the W matrix for later reference.
            self.encoding_matrices.append(W)
            self.encoding_biases.append(b)

            output = transfer_function(tf.matmul(next_layer_input, W) + b)
            output = tf.nn.dropout(output, self.keep_prob)

            # the input into the next layer is the output of this layer
            next_layer_input = output

        # The fully encoded x value is now stored in the next_layer_input
        self.encoded_x = next_layer_input

        # Feed forward net
        self.ff_matrices = []
        self.ff_biases = []
        # W = tf.get_variable(name="ffw1", shape=(4, 50), dtype=tf.float32,
        #                     initializer=tf.contrib.layers.xavier_initializer())
        W = tf.Variable(xavier_init(4, 50, transfer_function, self.RANDOM_INIT), name="ffw1")
        b = tf.Variable(tf.zeros([50]), name="ffb1")
        self.ff_matrices.append(W)
        self.ff_biases.append(b)
        output = transfer_function(tf.matmul(next_layer_input, W) + b)
        output = tf.nn.dropout(output, self.keep_prob)
        next_layer_input = output

        # W = tf.get_variable(name="ffw2", shape=(50, 2), dtype=tf.float32,
        #                     initializer=tf.contrib.layers.xavier_initializer())
        W = tf.Variable(xavier_init(50, 2, transfer_function, self.RANDOM_INIT), name="ffw2")
        b = tf.Variable(tf.zeros([2]), name="ffb2")
        self.ff_matrices.append(W)
        self.ff_biases.append(b)
        self.output = tf.nn.softmax(tf.matmul(next_layer_input, W) + b)

        self.y = tf.placeholder(tf.float32, shape=(None, 2))

        # compute cost
        self.cost = tf.reduce_mean(-tf.reduce_sum(self.y * tf.log(self.output), reduction_indices=[1]))
        self.optimizer = optimizer.minimize(self.cost)

        # initalize variables
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)

Example #8

Show file

    def __init__(self,
                 input_size,
                 n_classes,
                 layer_sizes,
                 layer_names,
                 tied_weights=False,
                 keep_prob=1,
                 momentum=0.5,
                 opt_unsup='gradient_descent',
                 opt_sup='ada_grad',
                 finetune_learning_rate=0.3,
                 transfer_function_enc=tf.nn.sigmoid,
                 transfer_function_dec=tf.nn.sigmoid,
                 l2reg=5e-4,
                 regtype='none',
                 loss_func_target='softmax_cross_entropy',
                 loss_func_au='mean_squared',
                 corr_frac=0,
                 corr_type='none',
                 dir_='mixDbn'):

        self.layer_names = layer_names
        self.tied_weights = tied_weights
        self.keep_prob = tf.placeholder(tf.float32)
        self.keep_prob_value = keep_prob
        self.l2reg = l2reg
        self.regtype = regtype
        self.loss_func_au = loss_func_au
        self.loss_func_target = loss_func_target
        self.finetune_learning_rate = finetune_learning_rate
        self.opt_unsup = opt_unsup
        self.opt_sup = opt_sup
        self.momentum = momentum

        self.corr_frac = corr_frac
        self.corr_type = corr_type

        # Build the encoding layers
        self.x = tf.placeholder(tf.float32, [None, input_size], name='x-input')
        self.y = tf.placeholder(tf.float32, [None, n_classes], name='y-input')
        self.x_corr = tf.placeholder(tf.float32, [None, input_size],
                                     name='x-corr-input')

        #next_layer_input = self.x
        next_layer_input = self.x_corr

        assert len(layer_sizes) == len(layer_names)

        self.encoding_matrices = []
        self.encoding_biases = []
        self.enconding_vars = []
        for i in range(len(layer_sizes)):
            dim = layer_sizes[i]
            input_dim = int(next_layer_input.get_shape()[1])

            # Initialize W using xavier initialization
            W = tf.Variable(xavier_init(input_dim, dim, transfer_function_enc),
                            name=layer_names[i][0])

            # Initialize b to zero
            b = tf.Variable(tf.zeros([dim]), name=layer_names[i][1])

            # We are going to use tied-weights so store the W matrix for later reference.
            self.encoding_matrices.append(W)
            self.encoding_biases.append(b)

            self.enconding_vars.append(W)
            self.enconding_vars.append(b)

            output = transfer_function_enc(tf.matmul(next_layer_input, W) + b)
            output = tf.nn.dropout(output, self.keep_prob)

            # the input into the next layer is the output of this layer
            next_layer_input = output

        with tf.name_scope('Model'):
            # The fully encoded x value is now stored in the next_layer_input
            self.encoded_x = next_layer_input

        # build the reconstruction layers by reversing the reductions
        layer_sizes.reverse()
        layer_names.reverse()
        self.encoding_matrices.reverse()

        self.decoding_matrices = []
        self.decoding_biases = []
        self.decoding_vars = []
        for i, dim in enumerate(layer_sizes[1:] +
                                [int(self.x.get_shape()[1])]):
            W = None
            # if we are using tied weights, so just lookup the encoding matrix for this step and transpose it
            if tied_weights:
                W = tf.transpose(self.encoding_matrices[i])
                b = tf.Variable(tf.zeros([dim]))
            else:
                #W = tf.Variable(tf.transpose(self.encoding_matrices[i].initialized_value()))
                W = tf.Variable(xavier_init(
                    self.encoding_matrices[i].get_shape()[1].value,
                    self.encoding_matrices[i].get_shape()[0].value,
                    transfer_function_dec),
                                name=layer_names[i][0] + 'd')
                b = tf.Variable(tf.zeros([dim]), name=layer_names[i][1] + 'd')
            self.decoding_matrices.append(W)
            self.decoding_biases.append(b)
            self.decoding_vars.append(W)
            self.decoding_vars.append(b)

            output = transfer_function_dec(tf.matmul(next_layer_input, W) + b)
            output = tf.nn.dropout(output, self.keep_prob)
            next_layer_input = output

        # i need to reverse the encoding matrices back for loading weights
        self.encoding_matrices.reverse()
        self.decoding_matrices.reverse()

        # the fully encoded and reconstructed value of x is here:
        self.reconstructed_x = next_layer_input

        # The fully encoded x value is now stored in the next_layer_input
        self.last_W = tf.Variable(tf.truncated_normal(
            [self.encoded_x.get_shape()[1].value, n_classes], stddev=0.1),
                                  name='sm-weigths')
        self.last_b = tf.Variable(tf.constant(0.1, shape=[n_classes]),
                                  name='sm-biases')
        with tf.name_scope('Model'):
            self.last_out = tf.add(tf.matmul(self.encoded_x, self.last_W),
                                   self.last_b)

        # compute cost
        vars = []
        vars.extend(self.encoding_matrices)
        vars.extend(self.encoding_biases)
        regterm = self.compute_regularization(vars)

        if self.loss_func_au == 'cross_entropy':
            clip_inf = tf.clip_by_value(self.reconstructed_x, 1e-10,
                                        float('inf'))
            clip_sup = tf.clip_by_value(1 - self.reconstructed_x, 1e-10,
                                        float('inf'))
            cost_au = -tf.reduce_mean(self.x * tf.log(clip_inf) +
                                      (1 - self.x) * tf.log(clip_sup))

        elif self.loss_func_au == 'softmax_cross_entropy':
            cost_au = tf.contrib.losses.softmax_cross_entropy(
                self.reconstructed_x, self.x)
        else:
            #mean_squared
            cost_au = tf.sqrt(
                tf.reduce_mean(tf.square(self.x - self.reconstructed_x)))

        if self.loss_func_target == 'cross_entropy':
            clip_inf = tf.clip_by_value(self.last_out, 1e-10, float('inf'))
            clip_sup = tf.clip_by_value(1 - self.last_out, 1e-10, float('inf'))
            cost_target = -tf.reduce_mean(self.y * tf.log(clip_inf) +
                                          (1 - self.y) * tf.log(clip_sup))

        elif self.loss_func_target == 'softmax_cross_entropy':
            cost_target = tf.contrib.losses.softmax_cross_entropy(
                self.last_out, self.y)
        else:
            #mean_squared
            cost_target = tf.sqrt(
                tf.reduce_mean(tf.square(self.y - self.last_out)))

        # cost = cost_target# tf.add(cost_au,cost_target)
        with tf.name_scope('Loss-au'):
            self.cost_target = (cost_target +
                                regterm) if regterm is not None else (
                                    cost_target)
        with tf.name_scope('Loss-target'):
            self.cost_au = (cost_au +
                            regterm) if regterm is not None else (cost_au)
        #_ = tf.scalar_summary(self.loss_func, self.cost)

        with tf.name_scope('OPT-au'):
            if self.opt_sup == 'gradient_descent':
                all_var = self.enconding_vars + [self.last_W, self.last_b]
                self.train_step_sup = tf.train.GradientDescentOptimizer(
                    self.finetune_learning_rate).minimize(self.cost_target,
                                                          var_list=all_var)
            elif self.opt_sup == 'ada_grad':
                all_var = self.enconding_vars + [self.last_W, self.last_b]
                self.train_step_sup = tf.train.AdagradOptimizer(
                    self.finetune_learning_rate).minimize(self.cost_target,
                                                          var_list=all_var)
            elif self.opt_sup == 'momentum':
                all_var = self.enconding_vars + [self.last_W, self.last_b]
                self.train_step_sup = tf.train.MomentumOptimizer(
                    self.finetune_learning_rate,
                    self.momentum).minimize(self.cost_target, var_list=all_var)
            elif self.opt_sup == 'adam':
                all_var = self.enconding_vars + [self.last_W, self.last_b]
                self.train_step_sup = tf.train.AdamOptimizer(
                    self.finetune_learning_rate).minimize(self.cost_target,
                                                          var_list=all_var)
            else:
                self.train_step_sup = None

        with tf.name_scope('OPT-target'):
            if self.opt_unsup == 'gradient_descent':
                all_var = self.enconding_vars + self.decoding_vars
                self.train_step_unsup = tf.train.GradientDescentOptimizer(
                    self.finetune_learning_rate).minimize(self.cost_au,
                                                          var_list=all_var)
            elif self.opt_unsup == 'ada_grad':
                all_var = self.enconding_vars + self.decoding_vars
                self.train_step_unsup = tf.train.AdagradOptimizer(
                    self.finetune_learning_rate).minimize(self.cost_au,
                                                          var_list=all_var)
            elif self.opt_unsup == 'momentum':
                all_var = self.enconding_vars + self.decoding_vars
                self.train_step_unsup = tf.train.MomentumOptimizer(
                    self.finetune_learning_rate,
                    self.momentum).minimize(self.cost_au, var_list=all_var)
            elif self.opt_unsup == 'adam':
                all_var = self.enconding_vars + self.decoding_vars
                self.train_step_unsup = tf.train.AdamOptimizer(
                    self.finetune_learning_rate).minimize(self.cost_au,
                                                          var_list=all_var)
            else:
                self.train_step_unsup = None

        self.model_predictions = tf.argmax(self.last_out, 1)
        correct_prediction = tf.equal(self.model_predictions,
                                      tf.argmax(self.y, 1))
        with tf.name_scope('Accuracy'):
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                   "float"))

        self.loss_unsup_summary = tf.scalar_summary("loss-au", self.cost_au)
        self.loss_sup_summary = tf.scalar_summary("loss-sup", self.cost_target)
        # self.merged_summary_op = tf.merge_all_summaries()

        self.accuracy_summary = tf.scalar_summary('accuracy', self.accuracy)

        #         scaledImageRec_x = tf.image.convert_image_dtype(self.reconstructed_x, dtype=tf.uint8)
        #         reconstructed_x_transposed = tf.transpose(scaledImageRec_x, [None, 28, 28, 1])
        #         self.reconstruct_summary = tf.image_summary('reconstruct', reconstructed_x_transposed ,max_image=30)

        logs_path = './tf-logs/' + dir_
        # initalize variables
        init = tf.initialize_all_variables()
        self.sess = tf.Session()
        self.sess.run(init)
        self.summary_writer = tf.train.SummaryWriter(
            logs_path, graph=tf.get_default_graph())