Example #1
0
 def get_recurrent_cell(self, num_units):
     if self.config.recurrent_cell_type is "norm_lstm":
         return lambda: LayerNormBasicLSTMCell(
             num_units=num_units, dropout_keep_prob=self.recurent_dropout)
     elif self.config.recurrent_cell_type is "lstm":
         return lambda: tf.nn.rnn_cell.LSTMCell(num_units=num_units)
     else:
         raise ValueError("Incorrect cell_type '" +
                          str(self.config.cell_type) + "'")
Example #2
0
 def get_cell():
   if cell_type == 'gru':
     return rnn_cell.GRUCell(self.hidden_size)
   elif cell_type == 'lstm':
     return rnn_cell.LSTMCell(self.hidden_size)
   elif cell_type == 'layer_norm':
     return LayerNormBasicLSTMCell(self.hidden_size,
         dropout_keep_prob=keep_prob)
   else:
     raise Exception('Unknown cell type: {}'.format(cell_type))
Example #3
0
def rnn_cell(dim, hparams, is_training):
    if hparams.rnn_type == 'ln_lstm':
        keep_prob = (1 - hparams.dropout_rate) if is_training else 1.0
        cell = LayerNormBasicLSTMCell(dim,
                                      dropout_keep_prob=keep_prob,
                                      layer_norm=True)
    elif hparams.rnn_type == 'zn_lstm':
        cell = LSTMBlockCell(dim)
        cell = ZoneoutWrapper(cell, hparams.zonout_prob, is_training)
    return cell
Example #4
0
 def define_sequence_model(self):
     seed=12345
     np.random.seed(12345)
     layer_list=[]
     with self.graph.as_default() as g:
         utt_length=tf.placeholder(tf.int32,shape=(None))
         g.add_to_collection(name="utt_length",value=utt_length)
         with tf.name_scope("input"):
              input_layer=tf.placeholder(dtype=tf.float32,shape=(None,None,self.n_in),name="input_layer")
              if self.dropout_rate!=0.0:
                 print "Using dropout to avoid overfitting and the dropout rate is",self.dropout_rate
                 is_training_drop=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_drop")
                 input_layer_drop=dropout(input_layer,self.dropout_rate,is_training=is_training_drop)
                 layer_list.append(input_layer_drop)
                 g.add_to_collection(name="is_training_drop",value=is_training_drop)
              else:
                 layer_list.append(input_layer)
         g.add_to_collection("input_layer",layer_list[0])
         with tf.name_scope("hidden_layer"):
            basic_cell=[]
            if "tanh" in self.hidden_layer_type:
                is_training_batch=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_batch")
                bn_params={"is_training":is_training_batch,"decay":0.99,"updates_collections":None}
                g.add_to_collection("is_training_batch",is_training_batch)
            for i in xrange(len(self.hidden_layer_type)):
                if self.dropout_rate!=0.0:
                    if self.hidden_layer_type[i]=="tanh":
                        new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,normalizer_params=bn_params)
                        new_layer_drop=dropout(new_layer,self.dropout_rate,is_training=is_training_drop)
                        layer_list.append(new_layer_drop)
                    if self.hidden_layer_type[i]=="lstm":
                        basic_cell.append(MyDropoutWrapper(BasicLSTMCell(num_units=self.hidden_layer_size[i]),self.dropout_rate,self.dropout_rate,is_training=is_training_drop))
                    if self.hidden_layer_type[i]=="gru":
                        basic_cell.append(MyDropoutWrapper(GRUCell(num_units=self.hidden_layer_size[i]),self.dropout_rate,self.dropout_rate,is_training=is_training_drop))
                else:
                    if self.hidden_layer_type[i]=="tanh":
                       new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,normalizer_params=bn_params)
                       layer_list.append(new_layer)
                    if self.hidden_layer_type[i]=="lstm":
                       basic_cell.append(LayerNormBasicLSTMCell(num_units=self.hidden_layer_size[i]))
                    if self.hidden_layer_type[i]=="gru":
                       basic_cell.append(LayerNormGRUCell(num_units=self.hidden_layer_size[i]))
            multi_cell=MultiRNNCell(basic_cell)
            rnn_outputs,rnn_states=tf.nn.dynamic_rnn(multi_cell,layer_list[-1],dtype=tf.float32,sequence_length=utt_length)
            layer_list.append(rnn_outputs)
         with tf.name_scope("output_layer"):
              if self.output_type=="linear" :
                  output_layer=tf.layers.dense(rnn_outputs,self.n_out)
               #  stacked_rnn_outputs=tf.reshape(rnn_outputs,[-1,self.n_out])
               #  stacked_outputs=tf.layers.dense(stacked_rnn_outputs,self.n_out)
               #  output_layer=tf.reshape(stacked_outputs,[-1,utt_length,self.n_out])
              g.add_to_collection(name="output_layer",value=output_layer)
         with tf.name_scope("training_op"):
              if self.optimizer=="adam":
                  self.training_op=tf.train.AdamOptimizer()
        def make_cell(num_units, residual):
            if self.rnn_type == 'gru':
                print("GRU")
                cell = GRUCell(num_units)
            else:
                if self.layer_norm:
                    print("LSTM With layer norm")
                    cell = LayerNormBasicLSTMCell(num_units, layer_norm=True)
                else:
                    print("LSTM Without layer norm")
                    #cell = LSTMCell(num_units)
                    cell = LSTMBlockCell(num_units)

            if residual:
                cell = ResidualWrapper(cell)
            return cell
Example #6
0
    def define_sequence_model(self):
        logger = logging.getLogger("define a sequential model")
        layer_list=[]
        with self.graph.as_default() as g:
            # the utterance lengths of this sequential data
            utt_length=tf.placeholder(tf.int32,shape=(None))
            global_step = tf.Variable(0, trainable=False)
            learning_rate = tf.train.exponential_decay(self.initial_learning_rate,
                                                            global_step=global_step,
                                                            decay_steps=50000, decay_rate=0.99)

            g.add_to_collection(name="utt_length",value=utt_length)
            g.add_to_collection(name="global_step",value=global_step)
            g.add_to_collection(name="learning_rate",value=learning_rate)
            with tf.name_scope("input"):
                input_layer=tf.placeholder(dtype=tf.float32,shape=(None,None,self.n_in),name="input_layer")
                layer_list.append(input_layer)
            g.add_to_collection("input_layer",layer_list[0])
            with tf.name_scope("hidden_layer"):
                basic_cell=[]
                if "tanh" in self.hidden_layer_type:
                    is_training_batch=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_batch")
                    bn_params={"is_training":is_training_batch,"decay":0.99,"updates_collections":None}
                    g.add_to_collection("is_training_batch",is_training_batch)
                for i in range(len(self.hidden_layer_type)):
                        if self.hidden_layer_type[i]=="tanh":
                            new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,normalizer_params=bn_params)
                            layer_list.append(new_layer)
                        if self.hidden_layer_type[i] == "selu":
                            new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.selu)
                            layer_list.append(new_layer)
                        if self.hidden_layer_type[i]=="lstm":
                            basic_cell.append(LayerNormBasicLSTMCell(num_units=self.hidden_layer_size[i]))
                        if self.hidden_layer_type[i]=="gru":
                            basic_cell.append(LayerNormGRUCell(num_units=self.hidden_layer_size[i]))
                multi_cell=MultiRNNCell(basic_cell)
                rnn_outputs,rnn_states=tf.nn.dynamic_rnn(multi_cell,layer_list[-1],dtype=tf.float32,sequence_length=utt_length)
                layer_list.append(rnn_outputs)
            with tf.name_scope("output_layer"):
                if self.output_type=="linear" :
                    output_layer=tf.layers.dense(rnn_outputs,self.n_out)
                g.add_to_collection(name="output_layer",value=output_layer)
Example #7
0
def get_cell(cell_type, size, layers=1, direction='unidirectional'):
    if cell_type == "layer_norm_basic":
        cell = LayerNormBasicLSTMCell(size)
    elif cell_type == "lstm_block_fused":
        cell = tf.contrib.rnn.LSTMBlockFusedCell(size)
    elif cell_type == "cudnn_lstm":
        cell = CudnnLSTM(layers, size, direction=direction)
    elif cell_type == "cudnn_gru":
        cell = CudnnGRU(layers, size, direction=direction)
    elif cell_type == "lstm_block":
        cell = LSTMBlockCell(size)
    elif cell_type == "gru_block":
        cell = GRUBlockCell(size)
    elif cell_type == "rnn":
        cell = BasicRNNCell(size)
    elif cell_type == "cudnn_rnn":
        cell = CudnnRNNTanh(layers, size)
    else:
        cell = BasicLSTMCell(size)
    return cell
Example #8
0
def single_rnn_cell(unit_type, num_units, dropout, residual_connection=False, residual_fn=None):
    """Create an instance of a single RNN cell."""
    # Cell Type
    if unit_type == "lstm":
        single_cell = BasicLSTMCell(num_units)
    elif unit_type == "gru":
        single_cell = GRUCell(num_units)
    elif unit_type == "layer_norm_lstm":
        single_cell = LayerNormBasicLSTMCell(num_units, layer_norm=True)
    elif unit_type == "nas":
        single_cell = NASCell(num_units)
    else:
        raise ValueError("Unknown unit type %s!" % unit_type)

    # Residual
    if residual_connection:
        single_cell = ResidualWrapper(single_cell, residual_fn=residual_fn)
    if dropout > 0.0:
        single_cell = DropoutWrapper(cell=single_cell, input_keep_prob=(1 - dropout))
    return single_cell
Example #9
0
def get_rnn_cell_list(config, name, reuse=False, seed=123, dtype=tf.float32):
    cell_list = []
    for i, units in enumerate(config['num_units']):
        cell = None
        if config['cell_type'] == 'clstm':
            cell = CustomLSTMCell(units, layer_norm=config['layer_norm'], activation=config['activation'], seed=seed,
                                  reuse=reuse, dtype=dtype, name='{}_{}'.format(name, i))
        elif config['cell_type'] == 'tflstm':

            act = get_activation(config['activation'])

            if config['layer_norm']:
                cell = LayerNormBasicLSTMCell(num_units=units, activation=act, layer_norm=config['layer_norm'],
                                              reuse=reuse)
            elif config['layer_norm'] == False and config['activation'] != 'tanh':
                cell = LSTMCell(num_units=units, activation=act, reuse=reuse)
            else:
                cell = LSTMBlockCell(num_units=units)
        cell_list.append(cell)

    return cell_list
Example #10
0
def single_cell(num_units,
                is_train,
                cell_type,
                dropout=0.0,
                forget_bias=0.0,
                dim_project=None):
    """Create an instance of a single RNN cell."""
    # dropout (= 1 - keep_prob) is set to 0 during eval and infer
    dropout = dropout if is_train else 0.0

    # Cell Type
    if cell_type == "lstm":
        single_cell = tf.contrib.rnn.LSTMCell(num_units,
                                              use_peepholes=True,
                                              num_proj=dim_project,
                                              cell_clip=50.0,
                                              forget_bias=1.0)
    elif cell_type == "cudnn_lstm":
        single_cell = tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(num_units)
    elif cell_type == "gru":
        single_cell = GRUCell(num_units)
    elif cell_type == "LSTMBlockCell":
        single_cell = tf.contrib.rnn.LSTMBlockCell(num_units,
                                                   forget_bias=forget_bias)
    elif cell_type == "layer_norm_lstm":
        single_cell = LayerNormBasicLSTMCell(num_units,
                                             forget_bias=forget_bias,
                                             layer_norm=True)
    else:
        raise ValueError("Unknown unit type %s!" % cell_type)

    if dim_project:
        single_cell = OutputProjectionWrapper(cell=single_cell,
                                              output_size=dim_project)

    if dropout > 0.0:
        single_cell = DropoutWrapper(cell=single_cell,
                                     input_keep_prob=(1.0 - dropout))

    return single_cell
Example #11
0
def main(_):
    np.random.seed(1)
    tf.set_random_seed(1)
    num_features = dp.train.num_features
    max_steps = dp.train.max_length
    num_cells = 250
    num_classes = dp.train.num_classes
    initialization_factor = 1.0
    num_iterations = 500
    batch_size = 100
    learning_rate = 0.001
    current_step = 0
    initializer = tf.random_uniform_initializer(
        minval=-np.sqrt(6.0 * 1.0 / (num_cells + num_classes)),
        maxval=np.sqrt(6.0 * 1.0 / (num_cells + num_classes)))

    with tf.variable_scope("train", initializer=initializer):
        s = tf.Variable(
            tf.random_normal(
                [num_cells], stddev=np.sqrt(
                    initialization_factor)))  # Determines initial state
        x = tf.placeholder(tf.float32,
                           [batch_size, max_steps, num_features])  # Features
        y = tf.placeholder(tf.float32, [batch_size])  # Labels
        l = tf.placeholder(tf.int32, [batch_size])
        global_step = tf.Variable(0, name="global_step", trainable=False)

        if FLAGS.rnn_type == "RWA":
            cell = RWACell(num_cells)
        elif FLAGS.rnn_type == "RWA_LN":
            cell = RWACell(num_cells, normalize=True)
        elif FLAGS.rnn_type == "RDA":
            cell = RDACell(num_cells)
        elif FLAGS.rnn_type == "RDA_LN":
            cell = RDACell(num_cells, normalize=True)
        elif FLAGS.rnn_type == "RAN":
            cell = RANCell(num_cells)
        elif FLAGS.rnn_type == "RAN_LN":
            cell = RANCell(num_cells, normalize=True)
        elif FLAGS.rnn_type == "LSTM":
            cell = BasicLSTMCell(num_cells)
        elif FLAGS.rnn_type == "LSTM_LN":
            cell = LayerNormBasicLSTMCell(num_cells)
        elif FLAGS.rnn_type == "GRU":
            cell = GRUCell(num_cells)
        else:
            raise Exception('No specified cell')

        states = cell.zero_state(batch_size, tf.float32)

        outputs, states = tf.nn.dynamic_rnn(cell, x, l, states)

        W_o = tf.Variable(
            tf.random_uniform([num_cells, num_classes],
                              minval=-np.sqrt(6.0 * initialization_factor /
                                              (num_cells + num_classes)),
                              maxval=np.sqrt(6.0 * initialization_factor /
                                             (num_cells + num_classes))))
        b_o = tf.Variable(tf.zeros([num_classes]))

        if FLAGS.rnn_type == "GRU":
            ly = tf.matmul(states, W_o) + b_o
        else:
            ly = tf.matmul(states.h, W_o) + b_o
        ly_flat = tf.reshape(ly, [batch_size])
        py = tf.nn.sigmoid(ly_flat)

    ##########################################################################################
    # Optimizer/Analyzer
    ##########################################################################################

    # Cost function and optimizer
    #
    cost = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            logits=ly_flat, labels=y))  # Cross-entropy cost function
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
        cost, global_step=global_step)

    # Evaluate performance
    #
    correct = tf.equal(tf.round(py), tf.round(y))
    accuracy = 100.0 * tf.reduce_mean(tf.cast(correct, tf.float32))

    tf.summary.scalar('cost', cost)
    tf.summary.scalar('accuracy', accuracy)

    ##########################################################################################
    # Train
    ##########################################################################################

    # Operation to initialize session
    #
    initializer = tf.global_variables_initializer()
    summaries = tf.summary.merge_all()

    # Open session
    #
    with tf.Session() as session:
        # Summary writer
        #
        summary_writer = tf.summary.FileWriter('log/' + FLAGS.rnn_type,
                                               session.graph)

        # Initialize variables
        #
        session.run(initializer)

        # Each training session represents one batch
        #
        for iteration in range(num_iterations):
            # Grab a batch of training data
            #
            xs, ls, ys = dp.train.batch(batch_size)
            feed = {x: xs, l: ls, y: ys}

            # Update parameters
            out = session.run(
                (cost, accuracy, optimizer, summaries, global_step),
                feed_dict=feed)
            print('Iteration:', iteration, 'Dataset:', 'train', 'Cost:',
                  out[0] / np.log(2.0), 'Accuracy:', out[1])

            summary_writer.add_summary(out[3], current_step)

            # Periodically run model on test data
            if iteration % 100 == 0:
                # Grab a batch of test data
                #
                xs, ls, ys = dp.test.batch(batch_size)
                feed = {x: xs, l: ls, y: ys}

                # Run model
                #
                summary_writer.flush()
                out = session.run((cost, accuracy), feed_dict=feed)
                test_cost = out[0] / np.log(2.0)
                test_accuracy = out[1]
                print('Iteration:', iteration, 'Dataset:', 'test', 'Cost:',
                      test_cost, 'Accuracy:', test_accuracy)

            current_step = tf.train.global_step(session, global_step)

        summary_writer.close()

        # Save the trained model
        os.makedirs('bin', exist_ok=True)
        saver = tf.train.Saver()
        saver.save(session, 'bin/train.ckpt')
Example #12
0
    def model(self,inputs,targets,en_len_sequence,zh_len_sequence):
        # global step
        with tf.device(self.cpu_device):
            global_step = tf.contrib.framework.get_or_create_global_step()

            start_tokens = tf.tile([0],[self.batch_size])
            end_token = tf.convert_to_tensor(0)

            en_embedding_matrix = tf.get_variable(name='embedding_matrix',
                                                  shape=(FLAGS.en_vocab_size, FLAGS.en_embedded_size),
                                                  dtype=tf.float32,
                                                  # regularizer=tf.nn.l2_loss,
                                                  initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01)
                                                  )
            zh_embedding_matrix = tf.get_variable(name='zh_embedding_matrix',
                                                  shape=(FLAGS.zh_vocab_size, FLAGS.zh_embedded_size),
                                                  dtype=tf.float32,
                                                  # regularizer=tf.nn.l2_loss,
                                                  initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))

            tf.add_to_collection(tf.GraphKeys.LOSSES, tf.nn.l2_loss(en_embedding_matrix))
            tf.add_to_collection(tf.GraphKeys.LOSSES, tf.nn.l2_loss(zh_embedding_matrix))

            tf.summary.histogram('zh_embedding_matrix', zh_embedding_matrix)  # 是否应该使用
            tf.summary.histogram('en_embedding_matrix', en_embedding_matrix)

            en_embedded = tf.nn.embedding_lookup(en_embedding_matrix, inputs)
            zh_embedded = tf.nn.embedding_lookup(zh_embedding_matrix, targets)

        # inference
        with tf.name_scope('encoder'):
            cells_fw = [DeviceWrapper(LayerNormBasicLSTMCell(num), self.devices[i]) for i,num in enumerate(config.encoder_fw_units)]
            cells_bw = [DeviceWrapper(LayerNormBasicLSTMCell(num), self.devices[i]) for i,num in enumerate(config.encoder_bw_units)]

            # outputs with shape [batch_size,max_len,output_size]
            # states_fw and states_bw is a list with length len(cells_fw)
            # [LSTMStateTuple_1,...,LSTMStateTuple_n]
            # LSTMStateTuple has attribute of c and h
            outputs, states_fw,states_bw = \
                stack_bidirectional_dynamic_rnn(cells_fw,
                                                cells_bw,
                                                en_embedded,
                                                dtype = tf.float32,
                                                sequence_length = en_len_sequence)

            # 将fw和bw的state按层concat起来形成decoder的initial_state
            states = [LSTMStateTuple(c=tf.concat([states_fw[i].c,states_bw[i].c],1),
                                     h=tf.concat([states_fw[i].h,states_bw[i].h],1))
                      for i in range(len(states_fw))]
            tf.summary.histogram('encoder_state', states)


        with tf.name_scope('decoder'):
            # 使用decoder的output计算attention
            attention_m = BahdanauAttention(FLAGS.attention_size,
                                            outputs,
                                            en_len_sequence)


            # 使用layer normalization,dropout
            cells_out = [DeviceWrapper(LayerNormBasicLSTMCell(num,
                                                              dropout_keep_prob=FLAGS.dropout_keep_prob),
                                       self.devices[-1]) for num in config.decoder_units]
            # attention wrapper
            cells_attention = [AttentionWrapper(cells_out[i],attention_m) for i in range(len(config.decoder_units))]

            # stack wrappper
            cells = MultiRNNCell(cells_attention)

            initial_cell_states = cells.zero_state(dtype=tf.float32,batch_size=self.batch_size)

            initial_states = tuple(initial_cell_states[i].clone(cell_state=states[i]) for i in range(len(states)))

            # # beam search
            # decoder = BeamSearchDecoder(cells,zh_embedding_matrix,start_tokens,end_token,initial_state=initial_states,beam_width=12)
            # beam search has some problem here , may be needed to imply by ourselves.

            # basic_decoder_helper

            if FLAGS.is_inference:
                helper = GreedyEmbeddingHelper(zh_embedding_matrix, start_tokens, end_token)
            else:
                helper = TrainingHelper(zh_embedded,zh_len_sequence)
            dense = Dense(FLAGS.zh_vocab_size, use_bias=False)

            # basic decoder
            decoder = BasicDecoder(cells, helper, initial_states, dense)  # 在这里初始化cell的state

            # dynamic decode
            logits, final_states, final_sequence_lengths = dynamic_decode(decoder)

            # loss
            max_zh_len = tf.reduce_max(zh_len_sequence)
            weights = tf.sequence_mask(zh_len_sequence, max_zh_len, dtype=tf.float32)
            inference_losses = tf.contrib.seq2seq.sequence_loss(logits.rnn_output, targets, weights)
            tf.summary.scalar('inference_loss', inference_losses)
            tf.add_to_collection(tf.GraphKeys.LOSSES, inference_losses)
            losses = tf.add_n(tf.get_collection(tf.GraphKeys.LOSSES))
            tf.summary.scalar('losses', losses)


            # train detail
            learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                                       global_step,
                                                       FLAGS.decay_step,
                                                       FLAGS.decay_rate)
            tf.summary.scalar('learning_rate', learning_rate)

            opt = tf.train.GradientDescentOptimizer(learning_rate)

            # using clipped gradient
            grads_and_vars = opt.compute_gradients(losses)
            clipped_grads_and_vars = tf.contrib.training.clip_gradient_norms(grads_and_vars, FLAGS.max_gradient)
            apply_grads_op = opt.apply_gradients(clipped_grads_and_vars, global_step)

            if FLAGS.is_inference:
                return logits.sample_id, [inputs, en_len_sequence, start_tokens, end_token]
            elif FLAGS.is_train:
                return {'loss': losses, 'train_op': apply_grads_op}
            else:
                return [global_step, losses]
Example #13
0
    def build(self):
        with tf.name_scope('recurrent_layers'):
            rnn_layers = [
                DropoutWrapper(
                    LayerNormBasicLSTMCell(units,
                                           dropout_keep_prob=self.keep_prob),
                    output_keep_prob=self.keep_prob) for units in self.hiddens
            ]

            multi_rnn_cell = MultiRNNCell(rnn_layers)
            outputs, _ = tf.nn.dynamic_rnn(multi_rnn_cell,
                                           self.data,
                                           dtype=tf.float32)

            x = self._last_relevant(outputs, self.length, self.hiddens[-1])

        features = tf.nn.dropout(x, keep_prob=self.keep_prob)

        x = slim.fully_connected(features,
                                 256,
                                 activation_fn=tf.nn.relu,
                                 scope='fc1')

        x = tf.contrib.layers.layer_norm(x)
        x = tf.nn.dropout(x, keep_prob=self.keep_prob)

        self.predictions = slim.fully_connected(x,
                                                self.pred_length,
                                                activation_fn=None,
                                                scope='final')

        self.reg_loss = tf.losses.mean_squared_error(self.target,
                                                     self.predictions)

        inv_concat = tf.scalar_mul(-self._lambda, features)
        inverse_gradient_layer = inv_concat + tf.stop_gradient(features -
                                                               inv_concat)

        x_ad = slim.fully_connected(inverse_gradient_layer,
                                    256,
                                    activation_fn=tf.nn.relu,
                                    scope='fc1_ad')
        x_ad = tf.contrib.layers.layer_norm(x_ad)

        self.class_preds = slim.fully_connected(x_ad,
                                                self.classes_num,
                                                activation_fn=None)
        self.acc, self.acc_op = tf.metrics.accuracy(
            labels=tf.argmax(self.class_target, 1),
            predictions=tf.argmax(self.class_preds, 1))

        self.classifier_loss = tf.losses.softmax_cross_entropy(
            self.class_target, self.class_preds)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        with tf.control_dependencies(update_ops):
            self.train_regressor_op = tf.train.AdamOptimizer(learning_rate=self.lr).\
                minimize(self.reg_loss)
            self.train_classifier_op_all = tf.train.AdamOptimizer(learning_rate=self.lr).\
                minimize(self.classifier_loss)

        diff = tf.abs(tf.subtract(self.target, self.predictions))
        self.mae = tf.reduce_mean(diff)
Example #14
0
    def fit(self,
            data,
            epochs=1000,
            max_seconds=600,
            activation=tf.nn.elu,
            batch_norm_decay=0.9,
            learning_rate=1e-5,
            batch_sz=1024,
            adapt_lr=False,
            print_progress=True,
            show_fig=True):

        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

        # static features
        X = data['X_train_static_mins']
        N, D = X.shape
        self.X = tf.placeholder(tf.float32, shape=(None, D), name='X')

        # timeseries features
        X_time = data['X_train_time_0']
        T1, N1, D1 = X_time.shape
        assert N == N1
        self.X_time = tf.placeholder(tf.float32,
                                     shape=(T1, None, D1),
                                     name='X_time')
        self.train = tf.placeholder(tf.bool, shape=(), name='train')
        self.rnn_keep_p_encode = tf.placeholder(tf.float32,
                                                shape=(),
                                                name='rnn_keep_p_encode')
        self.rnn_keep_p_decode = tf.placeholder(tf.float32,
                                                shape=(),
                                                name='rnn_keep_p_decode')
        adp_learning_rate = tf.placeholder(tf.float32,
                                           shape=(),
                                           name='adp_learning_rate')

        he_init = variance_scaling_initializer()
        bn_params = {
            'is_training': self.train,
            'decay': batch_norm_decay,
            'updates_collections': None
        }
        latent_size = self.encoder_layer_sizes[-1]

        inputs = self.X
        with tf.variable_scope('static_encoder'):
            for layer_size, keep_p in zip(self.encoder_layer_sizes[:-1],
                                          self.encoder_dropout[:-1]):
                inputs = dropout(inputs, keep_p, is_training=self.train)
                inputs = fully_connected(inputs,
                                         layer_size,
                                         weights_initializer=he_init,
                                         activation_fn=activation,
                                         normalizer_fn=batch_norm,
                                         normalizer_params=bn_params)

        if self.rnn_encoder_layer_sizes:
            with tf.variable_scope('rnn_encoder'):
                rnn_cell = MultiRNNCell([
                    LayerNormBasicLSTMCell(
                        s,
                        activation=tf.tanh,
                        dropout_keep_prob=self.rnn_encoder_dropout)
                    for s in self.rnn_encoder_layer_sizes
                ])
                time_inputs, states = tf.nn.dynamic_rnn(rnn_cell,
                                                        self.X_time,
                                                        swap_memory=True,
                                                        time_major=True,
                                                        dtype=tf.float32)
                time_inputs = tf.transpose(time_inputs, perm=(1, 0, 2))
                time_inputs = tf.reshape(
                    time_inputs,
                    shape=(-1, self.rnn_encoder_layer_sizes[-1] * T1))

            inputs = tf.concat([inputs, time_inputs], axis=1)

        with tf.variable_scope('latent_space'):
            inputs = dropout(inputs,
                             self.encoder_dropout[-1],
                             is_training=self.train)
            loc = fully_connected(inputs,
                                  latent_size,
                                  weights_initializer=he_init,
                                  activation_fn=None,
                                  normalizer_fn=batch_norm,
                                  normalizer_params=bn_params)
            scale = fully_connected(inputs,
                                    latent_size,
                                    weights_initializer=he_init,
                                    activation_fn=tf.nn.softplus,
                                    normalizer_fn=batch_norm,
                                    normalizer_params=bn_params)

            standard_normal = Normal(loc=np.zeros(latent_size,
                                                  dtype=np.float32),
                                     scale=np.ones(latent_size,
                                                   dtype=np.float32))
            e = standard_normal.sample(tf.shape(loc)[0])
            outputs = e * scale + loc

            static_output_size = self.decoder_layer_sizes[0]
            if self.rnn_decoder_layer_sizes:
                time_output_size = self.rnn_decoder_layer_sizes[0] * T1
                output_size = static_output_size + time_output_size
            else:
                output_size = static_output_size
            outputs = fully_connected(outputs,
                                      output_size,
                                      weights_initializer=he_init,
                                      activation_fn=activation,
                                      normalizer_fn=batch_norm,
                                      normalizer_params=bn_params)
            if self.rnn_decoder_layer_sizes:
                outputs, time_outputs = tf.split(
                    outputs, [static_output_size, time_output_size], axis=1)

        with tf.variable_scope('static_decoder'):
            for layer_size, keep_p in zip(self.decoder_layer_sizes,
                                          self.decoder_dropout[:-1]):
                outputs = dropout(outputs, keep_p, is_training=self.train)
                outputs = fully_connected(outputs,
                                          layer_size,
                                          weights_initializer=he_init,
                                          activation_fn=activation,
                                          normalizer_fn=batch_norm,
                                          normalizer_params=bn_params)
            outputs = dropout(outputs,
                              self.decoder_dropout[-1],
                              is_training=self.train)
            outputs = fully_connected(outputs,
                                      D,
                                      weights_initializer=he_init,
                                      activation_fn=None,
                                      normalizer_fn=batch_norm,
                                      normalizer_params=bn_params)

            X_hat = Bernoulli(logits=outputs)
            self.posterior_predictive = X_hat.sample()
            self.posterior_predictive_probs = tf.nn.sigmoid(outputs)

        if self.rnn_decoder_layer_sizes:
            with tf.variable_scope('rnn_decoder'):
                self.rnn_decoder_layer_sizes.append(D1)
                time_output_size = self.rnn_decoder_layer_sizes[0]
                time_outputs = tf.reshape(time_outputs,
                                          shape=(-1, T1, time_output_size))
                time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2))
                rnn_cell = MultiRNNCell([
                    LayerNormBasicLSTMCell(
                        s,
                        activation=tf.tanh,
                        dropout_keep_prob=self.rnn_decoder_dropout)
                    for s in self.rnn_decoder_layer_sizes
                ])
                time_outputs, states = tf.nn.dynamic_rnn(rnn_cell,
                                                         time_outputs,
                                                         swap_memory=True,
                                                         time_major=True,
                                                         dtype=tf.float32)
                time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2))
                time_outputs = tf.reshape(time_outputs, shape=(-1, T1 * D1))
                X_hat_time = Bernoulli(logits=time_outputs)
                posterior_predictive_time = X_hat_time.sample()
                posterior_predictive_time = tf.reshape(
                    posterior_predictive_time, shape=(-1, T1, D1))
                self.posterior_predictive_time = tf.transpose(
                    posterior_predictive_time, perm=(1, 0, 2))
                self.posterior_predictive_probs_time = tf.nn.sigmoid(
                    time_outputs)

        kl_div = -tf.log(scale) + 0.5 * (scale**2 + loc**2) - 0.5
        kl_div = tf.reduce_sum(kl_div, axis=1)

        expected_log_likelihood = tf.reduce_sum(X_hat.log_prob(self.X), axis=1)
        X_time_trans = tf.transpose(self.X_time, perm=(1, 0, 2))
        X_time_reshape = tf.reshape(X_time_trans, shape=(-1, T1 * D1))
        if self.rnn_encoder_layer_sizes:
            expected_log_likelihood_time = tf.reduce_sum(
                X_hat_time.log_prob(X_time_reshape), axis=1)
            elbo = -tf.reduce_sum(expected_log_likelihood +
                                  expected_log_likelihood_time - kl_div)
        else:
            elbo = -tf.reduce_sum(expected_log_likelihood - kl_div)
        train_op = tf.train.AdamOptimizer(
            learning_rate=adp_learning_rate).minimize(elbo)

        tf.summary.scalar('elbo', elbo)
        if self.save_file:
            saver = tf.train.Saver()

        if self.tensorboard:
            for v in tf.trainable_variables():
                tf.summary.histogram(v.name, v)
            train_merge = tf.summary.merge_all()
            writer = tf.summary.FileWriter(self.tensorboard)

        self.init_op = tf.global_variables_initializer()
        n = 0
        n_batches = N // batch_sz
        costs = list()
        min_cost = np.inf

        t0 = dt.now()
        with tf.Session() as sess:
            sess.run(self.init_op)
            for epoch in range(epochs):
                idxs = shuffle(range(N))
                X_train = X[idxs]
                X_train_time = X_time[:, idxs]

                for batch in range(n_batches):
                    n += 1
                    X_batch = X_train[batch * batch_sz:(batch + 1) * batch_sz]
                    X_batch_time = X_train_time[:,
                                                batch * batch_sz:(batch + 1) *
                                                batch_sz]

                    sess.run(train_op,
                             feed_dict={
                                 self.X: X_batch,
                                 self.X_time: X_batch_time,
                                 self.rnn_keep_p_encode:
                                 self.rnn_encoder_dropout,
                                 self.rnn_keep_p_decode:
                                 self.rnn_decoder_dropout,
                                 self.train: True,
                                 adp_learning_rate: learning_rate
                             })
                    if n % 100 == 0 and print_progress:
                        cost = sess.run(elbo,
                                        feed_dict={
                                            self.X: X,
                                            self.X_time: X_time,
                                            self.rnn_keep_p_encode: 1.0,
                                            self.rnn_keep_p_decode: 1.0,
                                            self.train: False
                                        })
                        cost /= N
                        costs.append(cost)

                        if adapt_lr and epoch > 0:
                            if cost < min_cost:
                                min_cost = cost
                            elif cost > min_cost * 1.01:
                                learning_rate *= 0.75
                                if print_progress:
                                    print('Updating Learning Rate',
                                          learning_rate)

                        print('Epoch:', epoch, 'Batch:', batch, 'Cost:', cost)

                        if self.tensorboard:
                            train_sum = sess.run(train_merge,
                                                 feed_dict={
                                                     self.X: X,
                                                     self.X_time: X_time,
                                                     self.rnn_keep_p_encode:
                                                     1.0,
                                                     self.rnn_keep_p_decode:
                                                     1.0,
                                                     self.train: False
                                                 })
                            writer.add_summary(train_sum, n)

                seconds = (dt.now() - t0).seconds
                if seconds > max_seconds:
                    if print_progress:
                        print('Breaking after', seconds, 'seconds')
                    break

            if self.save_file:
                saver.save(sess, self.save_file)

            if self.tensorboard:
                writer.add_graph(sess.graph)

        if show_fig:
            plt.plot(costs)
            plt.title('Costs and Scores')
            plt.show()