Example #1
0
 def readLine(self, buffer=4096):
     while '\n' not in self.buffer:
         self.buffer += self.recv(buffer).decode()
     lines = self.buffer.split('\n')
     line = lines.pop(0)
     self.buffer = '\n'.join(lines)
     return Translator.tensorStringToList(line)
def train():
    global BUCKET_ID
    for l in xrange(buckets[-1][0]):
        encoder_inputs.append(
            tf.placeholder(tf.int32,
                           shape=[batch_size],
                           name="encoder{0}".format(l)))
    for l in xrange(buckets[-1][1]):
        decoder_inputs.append(
            tf.placeholder(tf.int32,
                           shape=[batch_size],
                           name="decoder{0}".format(l)))
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[batch_size],
                           name="weight{0}".format(l)))

    global_step = tf.Variable(0, name="global_step", trainable=False)
    true_ans = tf.placeholder(tf.int32, [max_len, batch_size], name="true_ans")
    seq_len = tf.placeholder(tf.int32, name="seq_len")
    bucket_id = tf.placeholder(tf.int32, name="bucket_id")

    # return a list of different bucket,but only one bucket it what we need
    #[seq_len * batch_size]
    #just to feed fake_ans
    fake_ans = build_generator(encoder_inputs, decoder_inputs, target_weights,
                               bucket_id, seq_len)
    # 创建判别模型
    #true_ans, generated_ans:[max_len,batch_size,num_symbol]

    y_data, y_generated = build_discriminator(
        tf.one_hot(true_ans,
                   num_symbols,
                   on_value=1.0,
                   off_value=0.0,
                   axis=-1,
                   dtype=tf.float32,
                   name="onehot"), fake_ans, keep_prob, seq_len)

    # 损失函数的设置
    d_loss = -(tf.log(y_data) - tf.log(1 - y_generated))
    g_loss = -tf.log(y_generated)

    optimizer = tf.train.AdamOptimizer(0.0001)

    d_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 scope="discriminator")
    g_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 scope="generator")

    gard = optimizer.compute_gradients(d_loss, var_list=d_params)
    #print("gard ok")
    # 两个模型的优化函数
    d_trainer = optimizer.minimize(d_loss, var_list=d_params)
    g_trainer = optimizer.minimize(g_loss, var_list=g_params)

    init = tf.initialize_all_variables()

    # Create a saver.
    saver = tf.train.Saver(var_list=None, max_to_keep=5)
    # 启动默认图
    sess = tf.Session()
    # 初始化
    sess.run(init)
    #load previous variables
    if to_restore:
        print("reloading variables...")
        chkpt_fname = tf.train.latest_checkpoint(output_path)
        saver.restore(sess, chkpt_fname)
    if os.path.exists(output_path) == False:
        os.mkdir(output_path)

    steps = 5
    max_epoch = 5
    get_data = dataset.DataProvider(pkl_path='./bdwm_data_token.pkl',
                                    buckets_size=buckets,
                                    batch_size=batch_size)
    translator = Translator('./dict.txt')

    for i in range(sess.run(global_step), max_epoch):
        data_iterator = get_data.get_batch()
        for j in np.arange(steps):
            print("epoch:%s, iter:%s" % (i, j))
            feed_dict, BUCKET_ID = data_iterator.next()
            sess.run(d_trainer, feed_dict=feed_dict)
        sess.run(g_trainer, feed_dict=feed_dict)
        feed_dict, BUCKET_ID = data_iterator.next()
        #get gen val for the true bucket
        #gen_val = sess.run([mul_generated_ans[BUCKET_ID]], feed_dict=feed_dict)
        translator.translate_and_print(seq2seq_onehot2label())
        '''
Example #3
0
def train():
    global BUCKET_ID
    for l in xrange(buckets[-1][0]):
        encoder_inputs.append(
            tf.placeholder(tf.int32,
                           shape=[batch_size],
                           name="encoder{0}".format(l)))
    for l in xrange(buckets[-1][1]):
        decoder_inputs.append(
            tf.placeholder(tf.int32,
                           shape=[batch_size],
                           name="decoder{0}".format(l)))
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[batch_size],
                           name="weight{0}".format(l)))

    global_step = tf.Variable(0, name="global_step", trainable=False)
    true_ans = tf.placeholder(tf.int32, [max_len, batch_size], name="true_ans")
    seq_len = tf.placeholder(tf.int32, name="seq_len")
    bucket_id = tf.placeholder(tf.int32, name="bucket_id")

    # return a list of different bucket,but only one bucket it what we need
    #[seq_len * batch_size]
    fake_ans = build_generator(encoder_inputs, decoder_inputs, target_weights,
                               bucket_id, seq_len)
    # 创建判别模型
    #true_ans:[max_len,batch_size]
    #generated_ans:[max_len,batch_size,num_symbol]
    y_data, y_generated = build_discriminator(true_ans, fake_ans, keep_prob,
                                              seq_len)

    # 损失函数的设置
    #d_loss_real = tf.reduce_mean(tf.scalar_mul(-1,y_data))
    d_loss_real = tf.reduce_mean(y_data)
    d_loss_fake = tf.reduce_mean(y_generated)
    #d_loss = d_loss_fake + d_loss_real
    #d_loss = tf.reduce_mean(y_generated - y_data)
    d_loss = d_loss_fake - d_loss_real
    g_loss = tf.reduce_mean(tf.scalar_mul(-1, y_generated))

    optimizer_dis = tf.train.RMSPropOptimizer(learning_rate_dis,
                                              name='RMSProp_dis')
    optimizer_gen = tf.train.RMSPropOptimizer(learning_rate_gen,
                                              name='RMSProp_gen')

    d_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 scope="discriminator")
    g_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 scope="generator")

    #print(d_params)
    #print(g_params)
    #gard = optimizer.compute_gradients(d_loss,var_list=d_params)
    # 两个模型的优化函数
    d_trainer = optimizer_dis.minimize(d_loss, var_list=d_params)
    g_trainer = optimizer_gen.minimize(g_loss, var_list=g_params)

    #clip discrim weights
    d_clip = [
        tf.assign(v, tf.clip_by_value(v, CLIP_RANGE[0], CLIP_RANGE[1]))
        for v in d_params
    ]

    init = tf.global_variables_initializer()
    # Create a saver.
    saver = tf.train.Saver(var_list=None, max_to_keep=5)
    # 启动默认图
    #config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)
    #config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = 0.9
    sess = tf.Session()
    #sess = tf.Session(config=config)
    #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
    #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    # 初始化
    sess.run(init)
    sess.run(d_clip)
    #load previous variables
    if to_restore == True:
        print("reloading variables...")
        logging.debug("reloading variables...")
        ckpt = tf.train.get_checkpoint_state(output_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
    if os.path.exists(output_path) == False:
        os.mkdir(output_path)

    get_data = dataset.DataProvider(pkl_path='./bdwm_data_token.pkl',
                                    buckets_size=buckets,
                                    batch_size=batch_size)
    translator = Translator('./dict.txt')
    print("save ckpt")
    saver.save(sess,
               os.path.join(output_path, 'model.ckpt'),
               global_step=global_step)
    for i in range(sess.run(global_step), max_epoch):
        data_iterator = get_data.get_batch()
        if i < 15 or i % 500 == 0:
            citers = 10
        else:
            citers = CRITIC
        for j in np.arange(citers):
            print("epoch:%s, dis iter:%s" % (i, j))
            logging.debug("epoch:%s, dis iter:%s" % (i, j))
            try:
                feed_dict, BUCKET_ID = data_iterator.next()
            #except:
            except StopIteration:
                print("out of feed")
                get_data = dataset.DataProvider(
                    pkl_path='./bdwm_data_token.pkl',
                    buckets_size=buckets,
                    batch_size=batch_size)
                data_iterator = get_data.get_batch()
                feed_dict, BUCKET_ID = data_iterator.next()
            _, dis_loss, fake_value, true_value = sess.run(
                [d_trainer, d_loss, d_loss_fake, d_loss_real],
                feed_dict=feed_dict)
            sess.run(d_clip)
            print("d_loss:{}".format(dis_loss))
            print("fake:{} true:{}".format(fake_value, true_value))
            logging.debug("d_loss:{}".format(dis_loss))
            logging.debug("fake:{} true:{}".format(fake_value, true_value))

        for j in np.arange(gen_critic):
            print("epoch:%s, gen iter:%s" % (i, j))
            logging.debug("epoch:%s, gen iter:%s" % (i, j))
            try:
                feed_dict, BUCKET_ID = data_iterator.next()
            except StopIteration:
                print("out of feed")
                logging.debug("out of feed")
                get_data = dataset.DataProvider(
                    pkl_path='./bdwm_data_token.pkl',
                    buckets_size=buckets,
                    batch_size=batch_size)
                data_iterator = get_data.get_batch()
                feed_dict, BUCKET_ID = data_iterator.next()

            g_loss_val, _, d_loss_val = sess.run([g_loss, g_trainer, d_loss],
                                                 feed_dict=feed_dict)
            logging.debug("g_loss:{} d_loss:{}".format(g_loss_val, d_loss_val))
            print("g_loss:{} d_loss:{}".format(g_loss_val, d_loss_val))

        #get gen val for the true bucket
        gen_val = sess.run(fake_ans, feed_dict=feed_dict)
        translator.translate_and_print(seq2seq_onehot2label(gen_val),
                                       logger=logging)
        print("save ckpt")
        logging.debug("save ckpt")
        saver.save(sess,
                   os.path.join(output_path, 'model.ckpt'),
                   global_step=global_step)
Example #4
0
import os
import re
import sys
import glob

from utils import Translator

with open('key.txt') as file:
    lines = file.readlines()

for line in lines:
    if not '#' in line:
        key = line.strip()

translator = Translator(key=key)


def vid_down(url):
    os.system(
        "youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' --write-thumbnail --write-sub --write-auto-sub {}"
        .format(url))


def vtt2srt(file_src_path):

    file_tar_path = file_src_path.replace('.en.vtt', '.en.srt')

    file_src = open(file_src_path)
    file_tar = open(file_tar_path, 'w')

    content_src = file_src.readlines()
Example #5
0
 def GetResources(self):
     from utils import Paths, Translator
     IconPath = Paths.iconsPath() + "/drilling.png"
     MenuText = str(Translator.translate('Drilling'))
     ToolTip = str(Translator.translate('Create a drilling op'))
     return {'Pixmap': IconPath, 'MenuText': MenuText, 'ToolTip': ToolTip}
Example #6
0
 def GetResources(self):
     from utils import Paths, Translator
     IconPath = Paths.iconsPath() + "/drilling.png"
     MenuText = str(Translator.translate('Drilling'))
     ToolTip  = str(Translator.translate('Create a drilling op'))
     return {'Pixmap' : IconPath, 'MenuText': MenuText, 'ToolTip': ToolTip} 
Example #7
0
 def writeln(self, message):
     assert isinstance(message, (str, list)), "Input %s <%s> has to be string or list!" % (message, type(message))
     msg = Translator.listToTensorString(message) if isinstance(message, list) else message
     self.sendall(str.encode(msg + '\n'))
def train():
    for l in xrange(buckets[-1][0]):
        encoder_inputs.append(
            tf.placeholder(tf.int32,
                           shape=[batch_size],
                           name="encoder{0}".format(l)))
    for l in xrange(buckets[-1][1]):
        decoder_inputs.append(
            tf.placeholder(tf.int32,
                           shape=[batch_size],
                           name="decoder{0}".format(l)))
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[batch_size],
                           name="weight{0}".format(l)))

    global_step = tf.Variable(0, name="global_step", trainable=False)
    true_ans = tf.placeholder(tf.int32, [max_len, batch_size], name="true_ans")
    seq_len = tf.placeholder(tf.int32, name="seq_len")
    bucket_id = tf.placeholder(tf.int32, name="bucket_id")

    #[seq_len * batch_size]
    with tf.variable_scope('generator'):
        fake_ans = generator(encoder_inputs, decoder_inputs, target_weights,
                             bucket_id, seq_len)
    # 创建判别模型
    true_ans_one_hot = tf.one_hot(true_ans,
                                  num_symbols,
                                  on_value=1.0,
                                  off_value=0.0,
                                  axis=-1)

    y_data = discriminator(true_ans_one_hot,
                           keep_prob=keep_prob,
                           seq_len=seq_len)
    y_generated = discriminator(fake_ans,
                                keep_prob=keep_prob,
                                seq_len=seq_len,
                                reuse=True)

    # 损失函数的设置
    d_loss_real = tf.reduce_mean(y_data)
    d_loss_fake = tf.reduce_mean(y_generated)
    d_loss = d_loss_fake - d_loss_real
    g_loss = tf.reduce_mean(-y_generated)

    optimizer_dis = tf.train.RMSPropOptimizer(learning_rate_dis,
                                              name='RMSProp_dis')
    optimizer_gen = tf.train.RMSPropOptimizer(learning_rate_gen,
                                              name='RMSProp_gen')

    d_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 scope="discriminator")
    g_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 scope="generator")

    d_trainer = optimizer_dis.minimize(d_loss, var_list=d_params)
    g_trainer = optimizer_gen.minimize(g_loss, var_list=g_params)

    #clip discrim weights
    d_clip = [
        tf.assign(v, tf.clip_by_value(v, CLIP_RANGE[0], CLIP_RANGE[1]))
        for v in d_params
    ]

    init = tf.global_variables_initializer()
    # Create a saver.
    saver = tf.train.Saver(var_list=None, max_to_keep=5)

    sess = tf.Session()

    sess.run(init)
    sess.run(d_clip)
    #load previous variables
    if to_restore == True:
        print("reloading variables...")
        logging.debug("reloading variables...")
        ckpt = tf.train.get_checkpoint_state(output_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
    if os.path.exists(output_path) == False:
        os.mkdir(output_path)

    get_data = dataset.DataProvider(pkl_path='./bdwm_data_token.pkl',
                                    buckets_size=buckets,
                                    batch_size=batch_size)
    translator = Translator('./dict.txt')
    print("save ckpt")
    saver.save(sess,
               os.path.join(output_path, 'refine_model.ckpt'),
               global_step=global_step)
    for i in range(sess.run(global_step), max_epoch):
        data_iterator = get_data.get_batch()
        if i < 25 or i % 500 == 0:
            citers = 100
        else:
            citers = CRITIC
        for j in np.arange(citers):
            print("epoch:%s, dis iter:%s" % (i, j))
            logging.debug("epoch:%s, dis iter:%s" % (i, j))
            try:
                feed_dict, BUCKET_ID = data_iterator.next()
            except StopIteration:
                get_data = dataset.DataProvider(
                    pkl_path='./bdwm_data_token.pkl',
                    buckets_size=buckets,
                    batch_size=batch_size)
                data_iterator = get_data.get_batch()
                feed_dict, BUCKET_ID = data_iterator.next()
            _, dis_loss, fake_value, true_value = sess.run(
                [d_trainer, d_loss, d_loss_fake, d_loss_real],
                feed_dict=feed_dict)
            sess.run(d_clip)
            print("d_loss:{}".format(dis_loss))
            print("fake:{} true:{}".format(fake_value, true_value))
            logging.debug("d_loss:{}".format(dis_loss))
            logging.debug("fake:{} true:{}".format(fake_value, true_value))

        for j in np.arange(gen_critic):
            print("epoch:%s, gen iter:%s" % (i, j))
            logging.debug("epoch:%s, gen iter:%s" % (i, j))
            try:
                feed_dict, BUCKET_ID = data_iterator.next()
            except StopIteration:
                get_data = dataset.DataProvider(
                    pkl_path='./bdwm_data_token.pkl',
                    buckets_size=buckets,
                    batch_size=batch_size)
                data_iterator = get_data.get_batch()
                feed_dict, BUCKET_ID = data_iterator.next()

            g_loss_val, _, d_loss_val = sess.run([g_loss, g_trainer, d_loss],
                                                 feed_dict=feed_dict)
            logging.debug("g_loss:{} d_loss:{}".format(g_loss_val, d_loss_val))
            print("g_loss:{} d_loss:{}".format(g_loss_val, d_loss_val))

        #get gen val for the true bucket
        gen_val = sess.run([fake_ans], feed_dict=feed_dict)
        print(gen_val)
        #translator.translate_and_print(seq2seq_onehot2label(gen_val),logger = logging)
        print("save ckpt")
        logging.debug("save ckpt")
        saver.save(sess,
                   os.path.join(output_path, 'model.ckpt'),
                   global_step=global_step)