Ejemplo n.º 1
0
    def __init__(self, x, y1_, y2_, learning_rate):
        """Creates a NonLinearModel.

        Inherits from Model.

        Parameters:
            x: the placeholder for the input tensor.
            y1_: the placeholder for the output 1 tensor.
            y2_: the placeholder for the output 2 tensor.

        Returns:
            A NonLinearModel object.
        """
        model.Model.__init__(self)

        # get sizes for the input and outputs
        num_cols_in = x.get_shape().as_list()[1]
        num_states_in = x.get_shape().as_list()[2]
        num_states_out1 = y1_.get_shape().as_list()[1]
        num_cols_out2 = y2_.get_shape().as_list()[1]
        num_states_out2 = y2_.get_shape().as_list()[2]

        # the first layer flattens the data so that it can be passed through a fully connected layer
        x_flat = utilities.reshape(x, [-1, num_cols_in*num_states_in])

        hidden1 = utilities.fc_layer(x_flat, num_cols_in*num_states_in, num_cols_in*num_states_in*4, layer_name='hidden_1')
        hidden2 = utilities.fc_layer(hidden1, num_cols_in*num_states_in*4, num_cols_in*num_states_in*2, layer_name='hidden_2')

        # the dropout layer reduces over fitting
        dropped, self._keep_prob = utilities.dropout(hidden2)

        # the network splits here:
        # the first softmax layer reduces the output to a percentage chance for each of the output states
        output1 = utilities.fc_layer(dropped, 2*num_cols_in*num_states_in, num_states_out1, 'softmax_1', act=tf.nn.softmax)

        # the second softmax layer reduces the output to a percentage chance for each SNPs output states
        with tf.name_scope('softmax_2'):
            fc_layer = utilities.fc_layer(dropped, 2*num_cols_in*num_states_in, num_states_out2*num_cols_out2, layer_name='identity', act=tf.identity)
            output2 = tf.nn.softmax(utilities.reshape(fc_layer, [-1, num_cols_out2, num_states_out2], name_suffix='3'))

        # each of the loss layers compares the probability distributions between the correspinding outputs to get an error metric for the network's outputs
        self._loss1 = utilities.calculate_cross_entropy(output1, y1_, name_suffix='1')
        self._loss2 = utilities.calculate_cross_entropy(output2, y2_, name_suffix='2')
        # these losses are compined into one for the training
        with tf.name_scope('combined_loss'):
            combined_loss = tf.add(self._loss1, self._loss2)

        # the loss is used with the back propagtion algorithm to use gradient descent based ADAM optimization to teach the network
        self._train_step = utilities.train(learning_rate, combined_loss, training_method=utilities.Optimizer.Adam, name_suffix='1')

        # the accuracies for each output are calculated by comparing them to the correct outputs
        self._accuracy1 = utilities.calculate_epi_accuracy(output1, y1_, name_suffix='1')
        self._accuracy2 = utilities.calculate_snp_accuracy(output2, y2_, name_suffix='2')

        # find the top predicted snps
        self._epi_snps, self._count = utilities.predict_snps(output2)

        # merge all the summaries
        self._merged = tf.merge_all_summaries()
        
Ejemplo n.º 2
0
    def testDropoutShape(self):
        """Provides a test for checking that the dropout_layer does not change the shape of the input.

        Arguments:
            Nothing.

        Returns:
            Nothing.
        """
        input_tensor = tf.zeros([2, 3, 4, 5, 6])
        output_tensor, keep_prob = utilities.dropout(input_tensor)
        output_tensor_shape = tf.shape(output_tensor)
        with self.test_session() as sess:
            sess.run(tf.initialize_all_variables())
            self.assertAllEqual(np.array([2, 3, 4, 5, 6]), sess.run(output_tensor_shape, feed_dict={keep_prob : 0.1}))
Ejemplo n.º 3
0
    def testDropoutOps(self):
        """Provides a test for checking that the dropout_layer function adds correctly named operations to the graph.

        Arguments:
            Nothing.

        Returns:
            Nothing.
        """
        with self.test_session() as sess:
            input_tensor = tf.Variable([2, 3, 4, 5, 6], dtype=tf.float32)
            dropped, keep_prob = utilities.dropout(input_tensor, name_suffix='1')
            sess.run(tf.initialize_all_variables())
            sess.run(dropped, feed_dict={keep_prob : 0.1})
            op_dict = {"dropout_1/Placeholder": "Placeholder", "dropout_1/dropout/mul": "Mul", "dropout_1/dropout/Floor": "Floor", "dropout_1/dropout/add": "Add", "dropout_1/dropout/Shape": "Shape"}
            tf.python.framework.test_util.assert_ops_in_graph(op_dict, tf.get_default_graph())
Ejemplo n.º 4
0
    def testDropoutShape(self):
        """Provides a test for checking that the dropout_layer does not change the shape of the input.

        Arguments:
            Nothing.

        Returns:
            Nothing.
        """
        input_tensor = tf.zeros([2, 3, 4, 5, 6])
        output_tensor, keep_prob = utilities.dropout(input_tensor)
        output_tensor_shape = tf.shape(output_tensor)
        with self.test_session() as sess:
            sess.run(tf.initialize_all_variables())
            self.assertAllEqual(
                np.array([2, 3, 4, 5, 6]),
                sess.run(output_tensor_shape, feed_dict={keep_prob: 0.1}))
Ejemplo n.º 5
0
    def testDropoutOps(self):
        """Provides a test for checking that the dropout_layer function adds correctly named operations to the graph.

        Arguments:
            Nothing.

        Returns:
            Nothing.
        """
        with self.test_session() as sess:
            input_tensor = tf.Variable([2, 3, 4, 5, 6], dtype=tf.float32)
            dropped, keep_prob = utilities.dropout(input_tensor,
                                                   name_suffix='1')
            sess.run(tf.initialize_all_variables())
            sess.run(dropped, feed_dict={keep_prob: 0.1})
            op_dict = {
                "dropout_1/Placeholder": "Placeholder",
                "dropout_1/dropout/mul": "Mul",
                "dropout_1/dropout/Floor": "Floor",
                "dropout_1/dropout/add": "Add",
                "dropout_1/dropout/Shape": "Shape"
            }
            tf.python.framework.test_util.assert_ops_in_graph(
                op_dict, tf.get_default_graph())
Ejemplo n.º 6
0
    def __init__(self, x, y1_, y2_, learning_rate):
        """Creates a PoolConvModel.

        Inherits from Model.

        Parameters:
            x: the placeholder for the input tensor.
            y1_: the placeholder for the output 1 tensor.
            y2_: the placeholder for the output 2 tensor.

        Returns:
            A PoolConvModel object.
        """
        model.Model.__init__(self)

        # get sizes for the input and outputs
        num_cols_in = x.get_shape().as_list()[1]
        num_states_out1 = y1_.get_shape().as_list()[1]
        num_cols_out2 = y2_.get_shape().as_list()[1]
        num_states_out2 = y2_.get_shape().as_list()[2]

        # first layer reshapes the input to make it 4d as required by the convolution layers
        x_4d = utilities.reshape(x, [-1, num_cols_in, 3, 1], name_suffix='1')

        # the first convolution layer preserves the shape and increases the number of channels to 8
        conv1 = utilities.conv_layer(x_4d, [3, 3, 1, 8],
                                     padding='SAME',
                                     name_suffix='1')

        # the first pooling layer simply halves the data size along the SNP dimmension
        pool1 = utilities.pool_layer(conv1,
                                     shape=[1, 2, 1, 1],
                                     strides=[1, 2, 1, 1],
                                     name_suffix='1')

        # the second convolution layer preserves the shape and increases the number of channels to 16
        conv2 = utilities.conv_layer(pool1, [3, 3, 8, 16],
                                     padding='SAME',
                                     name_suffix='2')

        # the second pooling layer halves the data size along the SNP dimmension
        pool2 = utilities.pool_layer(conv2,
                                     shape=[1, 2, 1, 1],
                                     strides=[1, 2, 1, 1],
                                     name_suffix='2')

        # the third convolution layer reduces reduces the number of states dimmension to size 1 and increases the number of channels to 32
        conv3 = utilities.conv_layer(pool2, [1, 3, 16, 32],
                                     padding='VALID',
                                     name_suffix='3')

        # the third pooling layer halves the data size along the SNP dimmension
        pool3 = utilities.pool_layer(conv3,
                                     shape=[1, 2, 1, 1],
                                     strides=[1, 2, 1, 1],
                                     name_suffix='3')

        # the next layer flattens the data so that it can be passed through a fully connected layer
        final_shape = pool3.get_shape()
        flatten_size = int(final_shape[1] * final_shape[2] * final_shape[3])
        flatten = utilities.reshape(pool3, [-1, flatten_size], name_suffix='2')

        # the first fully connected layer halves the data size
        hidden1 = utilities.fc_layer(flatten,
                                     flatten_size,
                                     int(flatten_size / 2),
                                     layer_name='hidden_1')

        # the second fully connected layer halves the data size again
        hidden2 = utilities.fc_layer(hidden1,
                                     int(flatten_size / 2),
                                     int(flatten_size / 4),
                                     layer_name='hidden_2')

        # the dropout layer reduces over fitting
        dropped, self._keep_prob = utilities.dropout(hidden2, name_suffix='1')

        # the network splits here:
        # the first softmax layer reduces the output to a percentage chance for each of the output states
        output1 = utilities.fc_layer(dropped,
                                     int(flatten_size / 4),
                                     num_states_out1,
                                     layer_name='softmax_1',
                                     act=tf.nn.softmax)

        # the second softmax layer reduces the output to a percentage chance for each SNPs output states
        with tf.name_scope('softmax_2'):
            fc_layer = utilities.fc_layer(dropped,
                                          int(flatten_size / 4),
                                          num_states_out2 * num_cols_out2,
                                          layer_name='identity',
                                          act=tf.identity)
            #output2 = tf.sigmoid(utilities.reshape(fc_layer, [-1, num_cols_out2, 1], name_suffix='3'))
            output2 = tf.nn.softmax(
                utilities.reshape(fc_layer,
                                  [-1, num_cols_out2, num_states_out2],
                                  name_suffix='3'))

        # each of the loss layers compares the probability distributions between the correspinding outputs to get an error metric for the network's outputs
        self._loss1 = utilities.calculate_cross_entropy(output1,
                                                        y1_,
                                                        name_suffix='1')
        #self._loss2 = utilities.calculate_cross_entropy(output2, utilities.get_causing_epi_probs(y2_), name_suffix='2')
        self._loss2 = utilities.calculate_cross_entropy(output2,
                                                        y2_,
                                                        name_suffix='2')
        # these losses are compined into one for the training
        with tf.name_scope('combined_loss'):
            combined_loss = tf.add(self._loss1, self._loss2)

        # the loss is used with the back propagtion algorithm to use gradient descent based ADAM optimization to teach the network
        self._train_step = utilities.train(
            learning_rate,
            combined_loss,
            training_method=utilities.Optimizer.Adam,
            name_suffix='1')

        # the accuracies for each output are calculated by comparing them to the correct outputs
        self._accuracy1 = utilities.calculate_epi_accuracy(output1,
                                                           y1_,
                                                           name_suffix='1')
        self._accuracy2 = utilities.calculate_snp_accuracy(output2,
                                                           y2_,
                                                           name_suffix='2')

        # find the top predicted snps
        self._epi_snps, self._count = utilities.predict_snps(output2)

        # merge all the summaries
        self._merged = tf.merge_all_summaries()
Ejemplo n.º 7
0
    def __init__(self, x, y1_, y2_, learning_rate):
        """Creates a RecurrentModel.

        Inherits from Model.

        Parameters:
            x: the placeholder for the input tensor.
            y1_: the placeholder for the output 1 tensor.
            y2_: the placeholder for the output 2 tensor.

        Returns:
            A RecurrentModel object.
        """
        model.Model.__init__(self)

        # max_length = 100
        #
        # print("x shape: %s" % x.get_shape())
        # x = utilities.reshape(x, [-1, max_length, int(x.get_shape()[2])])
        # print("x shape: %s" % x.get_shape())

        # get sizes for the input and outputs
        num_states_out1 = y1_.get_shape().as_list()[1]
        num_cols_out2 = y2_.get_shape().as_list()[1]
        num_states_out2 = y2_.get_shape().as_list()[2]

        # parameters for the RNN
        num_neurons = 10
        num_layers = 1
        self._keep_prob = tf.placeholder(tf.float32)

        # setup the RNN cell
        cell = GRUCell(num_neurons)  # Or LSTMCell(num_neurons)
        cell = DropoutWrapper(cell, output_keep_prob=self._keep_prob)
        cell = MultiRNNCell([cell] * num_layers)

        output, _ = tf.nn.dynamic_rnn(cell,
                                      x,
                                      dtype=tf.float32,
                                      swap_memory=True,
                                      parallel_iterations=1)

        print("output shape: %s" % output.get_shape())

        output = tf.transpose(output, [1, 0, 2])

        print("output shape: %s" % output.get_shape())

        last = tf.gather(output, int(output.get_shape()[0]) - 1)

        print("last shape: %s" % last.get_shape())

        hidden1 = utilities.fc_layer(last,
                                     num_neurons,
                                     num_neurons,
                                     layer_name='hidden_1')
        hidden2 = utilities.fc_layer(hidden1,
                                     num_neurons,
                                     num_neurons,
                                     layer_name='hidden_2')

        # the dropout layer reduces over fitting
        dropped, _ = utilities.dropout(hidden2, keep_prob=self._keep_prob)

        # the network splits here:
        # the first softmax layer reduces the output to a percentage chance for each of the output states
        output1 = utilities.fc_layer(dropped,
                                     num_neurons,
                                     num_states_out1,
                                     'softmax_1',
                                     act=tf.nn.softmax)

        # the second softmax layer reduces the output to a percentage chance for each SNPs output states
        with tf.name_scope('softmax_2'):
            fc_layer = utilities.fc_layer(dropped,
                                          num_neurons,
                                          num_states_out2 * num_cols_out2,
                                          layer_name='identity',
                                          act=tf.identity)
            output2 = tf.nn.softmax(
                utilities.reshape(fc_layer,
                                  [-1, num_cols_out2, num_states_out2],
                                  name_suffix='3'))

        # each of the loss layers compares the probability distributions between the correspinding outputs to get an error metric for the network's outputs
        self._loss1 = utilities.calculate_cross_entropy(output1,
                                                        y1_,
                                                        name_suffix='1')
        self._loss2 = utilities.calculate_cross_entropy(output2,
                                                        y2_,
                                                        name_suffix='2')
        # these losses are compined into one for the training
        with tf.name_scope('combined_loss'):
            combined_loss = tf.add(self._loss1, self._loss2)

        # the loss is used with the back propagtion algorithm to use gradient descent based ADAM optimization to teach the network
        self._train_step = utilities.train(
            learning_rate,
            combined_loss,
            training_method=utilities.Optimizer.Adam,
            name_suffix='1')

        # the accuracies for each output are calculated by comparing them to the correct outputs
        self._accuracy1 = utilities.calculate_epi_accuracy(output1,
                                                           y1_,
                                                           name_suffix='1')
        self._accuracy2 = utilities.calculate_snp_accuracy(output2,
                                                           y2_,
                                                           name_suffix='2')

        # find the top predicted snps
        self._epi_snps, self._count = utilities.predict_snps(output2)

        # merge all the summaries
        self._merged = tf.merge_all_summaries()
Ejemplo n.º 8
0
    def __init__(self, x, y1_, y2_, learning_rate):
        """Creates a ScalingModel.

        Inherits from Model.

        Parameters:
            x: the placeholder for the input tensor.
            y1_: the placeholder for the output 1 tensor.
            y2_: the placeholder for the output 2 tensor.

        Returns:
            A ScalingModel object.
        """
        model.Model.__init__(self)

        # get sizes for the input and outputs
        num_cols_in = x.get_shape().as_list()[1]
        num_states_out1 = y1_.get_shape().as_list()[1]
        num_cols_out2 = y2_.get_shape().as_list()[1]
        num_states_out2 = y2_.get_shape().as_list()[2]

        self._keep_prob = tf.placeholder(tf.float32)

        # first layer reshapes the input to make it 4d as required by the convolution layers
        x_4d = utilities.reshape(x, [-1, num_cols_in, 3, 1], name_suffix='1')

        # the first convolution layer preserves the shape and increases the number of channels to 8
        conv1 = utilities.conv_layer(x_4d, [3, 3, 1, 8], padding='SAME', name_suffix='1')

        # the first pooling layer simply halves the data size along the SNP dimmension
        pool1 = utilities.pool_layer(conv1, shape=[1, 2, 1, 1], strides=[1, 2, 1, 1], name_suffix='1')

        # the second convolution layer preserves the shape and increases the number of channels to 16
        conv2 = utilities.conv_layer(pool1, [3, 3, 8, 16], padding='SAME', name_suffix='2')

        # the second pooling layer halves the data size along the SNP dimmension
        pool2 = utilities.pool_layer(conv2, shape=[1, 2, 1, 1], strides=[1, 2, 1, 1], name_suffix='2')

        # the third convolution layer reduces reduces the number of states dimmension to size 1 and increases the number of channels to 32
        conv3 = utilities.conv_layer(pool2, [1, 3, 16, 32], padding='VALID', name_suffix='3')

        # the third pooling layer halves the data size along the SNP dimmension
        pool3 = utilities.pool_layer(conv3, shape=[1, 2, 1, 1], strides=[1, 2, 1, 1], name_suffix='3')

        # the next layer flattens the data so that it can be passed through a fully connected layer
        final_shape = pool3.get_shape()
        flatten_size = int(final_shape[1]*final_shape[2]*final_shape[3])
        flatten = utilities.reshape(pool3, [-1, flatten_size], name_suffix='2')

        # the network splits here:
        # the first softmax layer reduces the output to a percentage chance for each of the output states
        hidden1 = utilities.fc_layer(flatten, flatten_size, int(flatten_size/100), layer_name='hidden_1')
        dropped1, _ = utilities.dropout(hidden1, name_suffix='1', keep_prob=self._keep_prob)
        hiddenx = utilities.fc_layer(dropped1, int(flatten_size/100), int(flatten_size/200), layer_name='hidden_x')
        droppedx, _ = utilities.dropout(hiddenx, name_suffix='x', keep_prob=self._keep_prob)
        output1 = utilities.fc_layer(droppedx, int(flatten_size/200), num_states_out1, layer_name='softmax_1', act=tf.nn.softmax)

        # the first fully connected layer halves the data size
        hidden2_1 = utilities.fc_layer(flatten, flatten_size, 100, layer_name='hidden_2_1', act=tf.identity)
        hidden2_2 = utilities.fc_layer(hidden2_1, 100, int(flatten_size/2), layer_name='hidden_2_2')

        # the dropout layer reduces over fitting
        dropped2, _ = utilities.dropout(hidden2_2, name_suffix='2', keep_prob=self._keep_prob)

        # the second fully connected layer halves the data size again
        hidden3_1 = utilities.fc_layer(dropped2, int(flatten_size/2), 100, layer_name='hidden_3_1', act=tf.identity)
        hidden3_2 = utilities.fc_layer(hidden3_1, 100, int(flatten_size/4), layer_name='hidden_3_2')

        dropped3, _ = utilities.dropout(hidden3_2, name_suffix='3', keep_prob=self._keep_prob)

        # the second softmax layer reduces the output to a percentage chance for each SNPs output states
        with tf.name_scope('softmax_2'):
            fc_layer_1 = utilities.fc_layer(dropped3, int(flatten_size/4), 100, layer_name='identity_1', act=tf.identity)
            fc_layer_2 = utilities.fc_layer(fc_layer_1, 100, num_states_out2*num_cols_out2, layer_name='identity_2', act=tf.identity)
            output2 = tf.nn.softmax(utilities.reshape(fc_layer_2, [-1, num_cols_out2, num_states_out2], name_suffix='3'))

        # each of the loss layers compares the probability distributions between the correspinding outputs to get an error metric for the network's outputs
        self._loss1 = utilities.calculate_cross_entropy(output1, y1_, name_suffix='1')
        self._loss2 = utilities.calculate_cross_entropy(output2, y2_, name_suffix='2')
        # these losses are compined into one for the training
        with tf.name_scope('combined_loss'):
            combined_loss = tf.add(self._loss1, self._loss2)

        # the loss is used with the back propagtion algorithm to use gradient descent based ADAM optimization to teach the network
        self._train_step = utilities.train(learning_rate, combined_loss, training_method=utilities.Optimizer.Adam, name_suffix='1')

        # the accuracies for each output are calculated by comparing them to the correct outputs
        self._accuracy1 = utilities.calculate_epi_accuracy(output1, y1_, name_suffix='1')
        self._accuracy2 = utilities.calculate_snp_accuracy(output2, y2_, name_suffix='2')

        # find the top predicted snps
        self._epi_snps, self._count = utilities.predict_snps(output2)

        # merge all the summaries
        self._merged = tf.merge_all_summaries()