예제 #1
0
def get_loss(opt):
    # encode audio feature
    logit = get_logit(opt.input[opt.gpu_index], voca_size=voca_size)
    for i in tf.get_collection("regularization_losses"):
        print(i)
    print('--------------------')

    train_list = tf.trainable_variables()

    var_list = tf.global_variables()
    real_var_list = []
    for item in var_list:
        # print(item)
        if 'W' in item.name:
            real_var_list.append(item)

    loss = logit.sg_ctc(target=opt.target[opt.gpu_index],
                        seq_len=opt.seq_len[opt.gpu_index])
    # print(loss)
    # tf.add_to_collection("losses", loss)
    # losses = tf.get_collection("losses")
    # losses += tf.get_collection("regularization_losses")
    # for i in tf.get_collection("losses"):
    #   print(i.name)
    # print('++++++++++++++++++++')
    # total_loss = tf.add_n(losses, name='total_loss')
    # for item in real_var_list:
    #   loss += 0.03 * tf.nn.l2_loss(item)

    # for i in tf.get_collection("regularization_losses"):
    #   loss += 0.03 * i

    regular_loss = tf.sg_regularizer_loss(0.03)
    loss += regular_loss
    return loss
예제 #2
0
def sg_optim(loss, **kwargs):
    opt = tf.sg_opt(kwargs)

    # default training options
    opt += tf.sg_opt(optim='MaxProp',
                     lr=0.001,
                     beta1=0.9,
                     beta2=0.99,
                     category='')

    # select optimizer
    if opt.optim == 'MaxProp':
        optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr,
                                                beta2=opt.beta2)
    elif opt.optim == 'AdaMax':
        optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr,
                                               beta1=opt.beta1,
                                               beta2=opt.beta2)

    # get trainable variables
    var_list = [
        t for t in tf.trainable_variables()
        if t.name.encode('utf8').startswith(opt.category)
    ]

    # calc gradient
    gradient = optim.compute_gradients(loss, var_list=var_list)

    # add summary
    for v, g in zip(var_list, gradient):
        tf.sg_summary_gradient(v, g)

    # gradient update op
    return optim.apply_gradients(gradient, global_step=tf.sg_global_step())
def generate():
    dev = '/cpu:0'
    with tf.device(dev):
        mydir = 'tfrc150char_wrd0704'
        files = [f for f in listdir(mydir) if isfile(join(mydir, f))]
        tfrecords_filename = []
        tfrecords_filename = [join(mydir, 'short_infer3.tfrecords')
                              ]  #[join(mydir, f) for f in tfrecords_filename]
        tfrecords_filename_inf = [join(mydir, '11_3.tfrecords')]

        print(tfrecords_filename)
        filename_queue = tf.train.string_input_producer(tfrecords_filename,
                                                        num_epochs=num_epochs,
                                                        shuffle=True,
                                                        capacity=1)
        infer_queue = tf.train.string_input_producer(tfrecords_filename_inf,
                                                     num_epochs=num_epochs,
                                                     shuffle=True,
                                                     capacity=1)

        optim = tf.train.AdamOptimizer(learning_rate=0.0001,
                                       beta1=0.9,
                                       beta2=0.99)

        # Calculate the gradients for each model tower.
        tower_grads = []
        reuse_vars = False
        with tf.variable_scope("dec_lstm") as scp:
            dec_cell = BasicLSTMCell2(Hp.w_emb_size,
                                      Hp.rnn_hd,
                                      state_is_tuple=True)

        with tf.variable_scope("contx_lstm") as scp:
            cell = BasicLSTMCell2(Hp.hd, Hp.rnn_hd, state_is_tuple=True)
            rnn_cell = tf.contrib.rnn.DropoutWrapper(
                cell,
                input_keep_prob=Hp.keep_prob,
                output_keep_prob=Hp.keep_prob)

        (words, chars) = read_and_decode(filename_queue,
                                         Hp.batch_size * Hp.num_gpus)

        words_splits = tf.split(axis=0,
                                num_or_size_splits=Hp.num_gpus,
                                value=words)
        chars_splits = tf.split(axis=0,
                                num_or_size_splits=Hp.num_gpus,
                                value=chars)

        word_emb = np.loadtxt("glove300d_0704.txt")
        Hp.word_vs = word_emb.shape[0]

        # --------------------------------------------------------------------------------
        with tf.name_scope('%s_%d' % ("tower", 0)) as scope:
            rnn_state = tower_infer_enc(chars_splits[0],
                                        scope,
                                        rnn_cell,
                                        dec_cell,
                                        word_emb,
                                        out_reuse_vars=False,
                                        dev='/cpu:0')

            chars_pl = tf.placeholder(tf.int32, shape=(None, Hp.c_maxlen))
            rnn_state_pl1 = [
                tf.placeholder(tf.float32, shape=(None, Hp.rnn_hd)),
                tf.placeholder(tf.float32, shape=(None, Hp.rnn_hd))
            ]
            rnn_state_pl = tf.contrib.rnn.LSTMStateTuple(
                rnn_state_pl1[0], rnn_state_pl1[1])

            final_ids, rnn_state_dec = tower_infer_dec(chars_pl,
                                                       scope,
                                                       rnn_cell,
                                                       dec_cell,
                                                       word_emb,
                                                       rnn_state_pl,
                                                       out_reuse_vars=False,
                                                       dev='/cpu:0')

        # --------------------------------------------------------------------------------

        saver = tf.train.Saver(tf.trainable_variables())
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.per_process_gpu_memory_fraction = 0.94

        session_config.gpu_options.allow_growth = False

        restore_dir = 'tnsrbrd/hin17d08m_1313g2'  #   lec30d07m_1634g2   lec04d07m_2006g2     lec28d07m_1221g2    lec31d07m_1548g2
        csv_file = join(restore_dir, time.strftime("hin%dd%mm_%H%M.csv"))
        csv_f = open(csv_file, 'a')
        csv_writer = csv.writer(csv_f)

        with tf.Session(config=session_config) as sess:
            sess.run(
                tf.group(tf.global_variables_initializer(),
                         tf.local_variables_initializer()))

            tf.train.start_queue_runners(sess=sess)
            saver.restore(sess,
                          tf.train.latest_checkpoint(
                              join(restore_dir,
                                   'last_chpt')))  #    lec04d07m_2006g2

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            for ep in range(num_epochs):

                tf.sg_set_infer(sess)
                rnn_state_val, w_txt, ch_txt = sess.run(
                    [rnn_state, words_splits[0], chars_splits[0]],
                    feed_dict={Hp.keep_prob: 1.0})

                predictions = []  #[w_txt[:,2,:]]
                for idx in range(3):
                    char_inpt = word2char_ids(
                        ids_val) if idx != 0 else ch_txt[:, 2, :]
                    ids_val, rnn_state_val = sess.run(
                        [final_ids, rnn_state_dec],
                        feed_dict={
                            Hp.keep_prob: 1.0,
                            rnn_state_pl1[0]: rnn_state_val[0],
                            rnn_state_pl1[1]: rnn_state_val[1],
                            chars_pl: char_inpt
                        })
                    temp = np.zeros((Hp.batch_size, Hp.w_maxlen))
                    for b in range(Hp.batch_size):
                        stop_ind = np.where(ids_val[b] == 2)[0]
                        if stop_ind.size > 0:
                            stop_ind = stop_ind[0]
                            ids_val[b, stop_ind +
                                    1:] = ids_val[b, stop_ind + 1:] * 0
                    temp[:, :ids_val.shape[1]] = ids_val
                    predictions.append(temp)

                # predictions are decode_sent x b x w_maxlen
                predictions = np.array(predictions)
                in_batches = [w_txt[b, :, :] for b in range(Hp.batch_size)]
                res_batches = [
                    predictions[:, b, :] for b in range(Hp.batch_size)
                ]

                for b in range(Hp.batch_size):
                    in_paragraph = idxword2txt(in_batches[b])
                    print("\n INPUT SAMPLE \n")
                    print(in_paragraph)

                    res_paragraph = idxword2txt(res_batches[b])
                    print("\n RESULTS \n")
                    print(res_paragraph)

                    csv_writer.writerow([
                        " ".join(in_paragraph[:3]), " ".join(in_paragraph[3:]),
                        " ".join(res_paragraph)
                    ])

            csv_f.close()
예제 #4
0
def sg_optim(loss, **kwargs):
    r"""Applies gradients to variables.
    Args:
        loss: A 0-D `Tensor` containing the value to minimize. list of 0-D tensor for Multiple GPU
        kwargs:
          optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', 'RMSProp' or 'sgd'.
          lr: A Python Scalar (optional). Learning rate. Default is .001.
          beta1: A Python Scalar (optional). Default is .9.
          beta2: A Python Scalar (optional). Default is .99.
          momentum : A Python Scalar for RMSProp optimizer (optional). Default is 0.
          category: A string or string list. Specifies the variables that should be trained (optional).
            Only if the name of a trainable variable starts with `category`, it's value is updated.
            Default is '', which means all trainable variables are updated.
    """
    opt = tf.sg_opt(kwargs)

    # default training options
    opt += tf.sg_opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, momentum=0., category='')

    # select optimizer
    if opt.optim == 'MaxProp':
        optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr, beta2=opt.beta2)
    elif opt.optim == 'AdaMax':
        optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2)
    elif opt.optim == 'Adam':
        optim = tf.train.AdamOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2)
    elif opt.optim == 'RMSProp':
        optim = tf.train.RMSPropOptimizer(learning_rate=opt.lr, decay=opt.beta1, momentum=opt.momentum)
    else:
        optim = tf.train.GradientDescentOptimizer(learning_rate=opt.lr)

    # get trainable variables
    if isinstance(opt.category, (tuple, list)):
        var_list = []
        for cat in opt.category:
            var_list.extend([t for t in tf.trainable_variables() if t.name.startswith(cat)])
    else:
        var_list = [t for t in tf.trainable_variables() if t.name.startswith(opt.category)]

    #
    # calc gradient
    #

    # multiple GPUs case
    if isinstance(loss, (tuple, list)):
        gradients = []
        # loop for each GPU tower
        for i, loss_ in enumerate(loss):
            # specify device
            with tf.device('/gpu:%d' % i):
                # give new scope only to operation
                with tf.name_scope('gpu_%d' % i):
                    # add gradient calculation operation for each GPU tower
                    gradients.append(tf.gradients(loss_, var_list))

        # averaging gradient
        gradient = []
        for grad in zip(*gradients):
            gradient.append(tf.add_n(grad) / len(loss))
    # single GPU case
    else:
        gradient = tf.gradients(loss, var_list)

    gradient, _ = tf.clip_by_global_norm(gradient, opt.clip_grad_norm)

    # gradient update op
    with tf.device('/gpu:0'):
        grad_var = [(g, v) for g, v in zip(gradient, var_list)]
        grad_op = optim.apply_gradients(grad_var, global_step=tf.sg_global_step())

    # add summary using last tower value
    for g, v in grad_var:
        # exclude batch normal statics
        if 'mean' not in v.name and 'variance' not in v.name \
                and 'beta' not in v.name and 'gamma' not in v.name:
            tf.sg_summary_gradient(v, g)

    # extra update ops within category ( for example, batch normal running stat update )
    if isinstance(opt.category, (tuple, list)):
        update_op = []
        for cat in opt.category:
            update_op.extend([t for t in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if t.name.startswith(cat)])
    else:
        update_op = [t for t in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if t.name.startswith(opt.category)]

    return tf.group(*([grad_op] + update_op))
예제 #5
0
def sg_optim(loss, **kwargs):
    r"""Applies gradients to variables.

    Args:
        loss: A 0-D `Tensor` containing the value to minimize.
        kwargs:
          optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', or 'sgd'.
          lr: A Python Scalar (optional). Learning rate. Default is .001.
          beta1: A Python Scalar (optional). Default is .9.
          beta2: A Python Scalar (optional). Default is .99.
          category: A string or string list. Specifies the variables that should be trained (optional).
            Only if the name of a trainable variable starts with `category`, it's value is updated.
            Default is '', which means all trainable variables are updated.
    """
    opt = tf.sg_opt(kwargs)

    # default training options
    opt += tf.sg_opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='')

    # select optimizer
    if opt.optim == 'MaxProp':
        optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr, beta2=opt.beta2)
    elif opt.optim == 'AdaMax':
        optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2)
    elif opt.optim == 'Adam':
        optim = tf.train.AdamOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2)
    elif opt.optim == 'DP_GD':
        optim = tf.sg_optimize.DPGradientDescentOptimizer(
                    opt.lr,
                    [opt.eps, opt.delta],
                    opt.gaussian_sanitizer,
                    sigma=opt.sigma,
                    batches_per_lot=opt.batches_per_lot)
    else:
        optim = tf.train.GradientDescentOptimizer(learning_rate=opt.lr)

    # get trainable variables
    if isinstance(opt.category, (tuple, list)):
        var_list = []
        for cat in opt.category:
            var_list.extend([t for t in tf.trainable_variables() if t.name.startswith(cat)])
    else:
        var_list = [t for t in tf.trainable_variables() if t.name.startswith(opt.category)]

    if opt.optim == 'DP_GD':
        # only handle 1 batch per lot
        print(type(loss))
        print(loss)
        sanitized_grads = optim.compute_sanitized_gradients(loss,
                                                            var_list=var_list)

        for v, g in zip(var_list, sanitized_grads):
            # exclude batch normal statics
            if 'mean' not in v.name and 'variance' not in v.name \
                    and 'beta' not in v.name and 'gamma' not in v.name:
                tf.sg_summary_gradient(v, g)

        grad_op = optim.apply_gradients(sanitized_grads,
                                        global_step=tf.sg_global_step())
    else:
    # calc gradient
        gradient = optim.compute_gradients(loss, var_list=var_list)

        # add summary
        for v, g in zip(var_list, gradient):
            # exclude batch normal statics
            if 'mean' not in v.name and 'variance' not in v.name \
                    and 'beta' not in v.name and 'gamma' not in v.name:
                tf.sg_summary_gradient(v, g)

        # gradient update op
        grad_op = optim.apply_gradients(gradient, global_step=tf.sg_global_step())

    # extra update ops within category ( for example, batch normal running stat update )
    if isinstance(opt.category, (tuple, list)):
        update_op = []
        for cat in opt.category:
            update_op.extend([t for t in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if t.name.startswith(cat)])
    else:
        update_op = [t for t in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if t.name.startswith(opt.category)]

    return [grad_op] + update_op
예제 #6
0
def sg_optim(loss, **kwargs):
    r"""Applies gradients to variables.

    Args:
        loss: A 0-D `Tensor` containing the value to minimize.
        kwargs:
          optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', or 'sgd'.
          lr: A Python Scalar (optional). Learning rate. Default is .001.
          beta1: A Python Scalar (optional). Default is .9.
          beta2: A Python Scalar (optional). Default is .99.
          category: A string or string list. Specifies the variables that should be trained (optional).
            Only if the name of a trainable variable starts with `category`, it's value is updated.
            Default is '', which means all trainable variables are updated.
    """
    opt = tf.sg_opt(kwargs)

    # default training options
    opt += tf.sg_opt(optim='MaxProp',
                     lr=0.001,
                     beta1=0.9,
                     beta2=0.99,
                     category='')

    # select optimizer
    if opt.optim == 'MaxProp':
        optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr,
                                                beta2=opt.beta2)
    elif opt.optim == 'AdaMax':
        optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr,
                                               beta1=opt.beta1,
                                               beta2=opt.beta2)
    elif opt.optim == 'Adam':
        optim = tf.train.AdamOptimizer(learning_rate=opt.lr,
                                       beta1=opt.beta1,
                                       beta2=opt.beta2)
    else:
        optim = tf.train.GradientDescentOptimizer(learning_rate=opt.lr)

    # get trainable variables
    if isinstance(opt.category, (tuple, list)):
        var_list = []
        for cat in opt.category:
            var_list.extend([
                t for t in tf.trainable_variables() if t.name.startswith(cat)
            ])
    else:
        var_list = [
            t for t in tf.trainable_variables()
            if t.name.startswith(opt.category)
        ]

    # calc gradient
    gradient = optim.compute_gradients(loss, var_list=var_list)

    # add summary
    for v, g in zip(var_list, gradient):
        # exclude batch normal statics
        if 'mean' not in v.name and 'variance' not in v.name \
                and 'beta' not in v.name and 'gamma' not in v.name:
            tf.sg_summary_gradient(v, g)

    # gradient update op
    return optim.apply_gradients(gradient, global_step=tf.sg_global_step())
예제 #7
0
logit = (skip.sg_conv1d(size=1, act='tanh', bn=True, dout=0.25).sg_conv1d(
    size=1, act='tanh', bn=True, dim=20, dout=0.25).sg_conv1d(
        size=1, act='tanh', bn=True, dim=2,
        dout=0.25).sg_reshape(shape=(batch_size, 200)).sg_dense(
            in_dim=200, dim=50,
            act='relu', dout=0.20).sg_dense(in_dim=50,
                                            dim=10,
                                            act='relu',
                                            dout=0.20).sg_dense(in_dim=10,
                                                                dim=2,
                                                                dout=0.20))

# CTC loss
#loss = logit.sg_ctc(target=y, seq_len=seq_len)
reg_lambda = 0.0002
trainable = tf.trainable_variables()
lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in trainable]) * reg_lambda
loss = logit.sg_ce(target=y, one_hot=True) + lossL2

# train
config = tf.ConfigProto(allow_soft_placement=True,
                        inter_op_parallelism_threads=6,
                        intra_op_parallelism_threads=6)
sess = tf.Session(config=config)
tf.sg_init(sess)

learning_rate = tf.train.exponential_decay(0.00001,
                                           tf.sg_global_step(),
                                           100,
                                           0.95,
                                           staircase=False)