def readLine(self, buffer=4096): while '\n' not in self.buffer: self.buffer += self.recv(buffer).decode() lines = self.buffer.split('\n') line = lines.pop(0) self.buffer = '\n'.join(lines) return Translator.tensorStringToList(line)
def train(): global BUCKET_ID for l in xrange(buckets[-1][0]): encoder_inputs.append( tf.placeholder(tf.int32, shape=[batch_size], name="encoder{0}".format(l))) for l in xrange(buckets[-1][1]): decoder_inputs.append( tf.placeholder(tf.int32, shape=[batch_size], name="decoder{0}".format(l))) target_weights.append( tf.placeholder(tf.float32, shape=[batch_size], name="weight{0}".format(l))) global_step = tf.Variable(0, name="global_step", trainable=False) true_ans = tf.placeholder(tf.int32, [max_len, batch_size], name="true_ans") seq_len = tf.placeholder(tf.int32, name="seq_len") bucket_id = tf.placeholder(tf.int32, name="bucket_id") # return a list of different bucket,but only one bucket it what we need #[seq_len * batch_size] #just to feed fake_ans fake_ans = build_generator(encoder_inputs, decoder_inputs, target_weights, bucket_id, seq_len) # 创建判别模型 #true_ans, generated_ans:[max_len,batch_size,num_symbol] y_data, y_generated = build_discriminator( tf.one_hot(true_ans, num_symbols, on_value=1.0, off_value=0.0, axis=-1, dtype=tf.float32, name="onehot"), fake_ans, keep_prob, seq_len) # 损失函数的设置 d_loss = -(tf.log(y_data) - tf.log(1 - y_generated)) g_loss = -tf.log(y_generated) optimizer = tf.train.AdamOptimizer(0.0001) d_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="discriminator") g_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="generator") gard = optimizer.compute_gradients(d_loss, var_list=d_params) #print("gard ok") # 两个模型的优化函数 d_trainer = optimizer.minimize(d_loss, var_list=d_params) g_trainer = optimizer.minimize(g_loss, var_list=g_params) init = tf.initialize_all_variables() # Create a saver. saver = tf.train.Saver(var_list=None, max_to_keep=5) # 启动默认图 sess = tf.Session() # 初始化 sess.run(init) #load previous variables if to_restore: print("reloading variables...") chkpt_fname = tf.train.latest_checkpoint(output_path) saver.restore(sess, chkpt_fname) if os.path.exists(output_path) == False: os.mkdir(output_path) steps = 5 max_epoch = 5 get_data = dataset.DataProvider(pkl_path='./bdwm_data_token.pkl', buckets_size=buckets, batch_size=batch_size) translator = Translator('./dict.txt') for i in range(sess.run(global_step), max_epoch): data_iterator = get_data.get_batch() for j in np.arange(steps): print("epoch:%s, iter:%s" % (i, j)) feed_dict, BUCKET_ID = data_iterator.next() sess.run(d_trainer, feed_dict=feed_dict) sess.run(g_trainer, feed_dict=feed_dict) feed_dict, BUCKET_ID = data_iterator.next() #get gen val for the true bucket #gen_val = sess.run([mul_generated_ans[BUCKET_ID]], feed_dict=feed_dict) translator.translate_and_print(seq2seq_onehot2label()) '''
def train(): global BUCKET_ID for l in xrange(buckets[-1][0]): encoder_inputs.append( tf.placeholder(tf.int32, shape=[batch_size], name="encoder{0}".format(l))) for l in xrange(buckets[-1][1]): decoder_inputs.append( tf.placeholder(tf.int32, shape=[batch_size], name="decoder{0}".format(l))) target_weights.append( tf.placeholder(tf.float32, shape=[batch_size], name="weight{0}".format(l))) global_step = tf.Variable(0, name="global_step", trainable=False) true_ans = tf.placeholder(tf.int32, [max_len, batch_size], name="true_ans") seq_len = tf.placeholder(tf.int32, name="seq_len") bucket_id = tf.placeholder(tf.int32, name="bucket_id") # return a list of different bucket,but only one bucket it what we need #[seq_len * batch_size] fake_ans = build_generator(encoder_inputs, decoder_inputs, target_weights, bucket_id, seq_len) # 创建判别模型 #true_ans:[max_len,batch_size] #generated_ans:[max_len,batch_size,num_symbol] y_data, y_generated = build_discriminator(true_ans, fake_ans, keep_prob, seq_len) # 损失函数的设置 #d_loss_real = tf.reduce_mean(tf.scalar_mul(-1,y_data)) d_loss_real = tf.reduce_mean(y_data) d_loss_fake = tf.reduce_mean(y_generated) #d_loss = d_loss_fake + d_loss_real #d_loss = tf.reduce_mean(y_generated - y_data) d_loss = d_loss_fake - d_loss_real g_loss = tf.reduce_mean(tf.scalar_mul(-1, y_generated)) optimizer_dis = tf.train.RMSPropOptimizer(learning_rate_dis, name='RMSProp_dis') optimizer_gen = tf.train.RMSPropOptimizer(learning_rate_gen, name='RMSProp_gen') d_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="discriminator") g_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="generator") #print(d_params) #print(g_params) #gard = optimizer.compute_gradients(d_loss,var_list=d_params) # 两个模型的优化函数 d_trainer = optimizer_dis.minimize(d_loss, var_list=d_params) g_trainer = optimizer_gen.minimize(g_loss, var_list=g_params) #clip discrim weights d_clip = [ tf.assign(v, tf.clip_by_value(v, CLIP_RANGE[0], CLIP_RANGE[1])) for v in d_params ] init = tf.global_variables_initializer() # Create a saver. saver = tf.train.Saver(var_list=None, max_to_keep=5) # 启动默认图 #config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) #config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = 0.9 sess = tf.Session() #sess = tf.Session(config=config) #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # 初始化 sess.run(init) sess.run(d_clip) #load previous variables if to_restore == True: print("reloading variables...") logging.debug("reloading variables...") ckpt = tf.train.get_checkpoint_state(output_path) saver.restore(sess, ckpt.model_checkpoint_path) if os.path.exists(output_path) == False: os.mkdir(output_path) get_data = dataset.DataProvider(pkl_path='./bdwm_data_token.pkl', buckets_size=buckets, batch_size=batch_size) translator = Translator('./dict.txt') print("save ckpt") saver.save(sess, os.path.join(output_path, 'model.ckpt'), global_step=global_step) for i in range(sess.run(global_step), max_epoch): data_iterator = get_data.get_batch() if i < 15 or i % 500 == 0: citers = 10 else: citers = CRITIC for j in np.arange(citers): print("epoch:%s, dis iter:%s" % (i, j)) logging.debug("epoch:%s, dis iter:%s" % (i, j)) try: feed_dict, BUCKET_ID = data_iterator.next() #except: except StopIteration: print("out of feed") get_data = dataset.DataProvider( pkl_path='./bdwm_data_token.pkl', buckets_size=buckets, batch_size=batch_size) data_iterator = get_data.get_batch() feed_dict, BUCKET_ID = data_iterator.next() _, dis_loss, fake_value, true_value = sess.run( [d_trainer, d_loss, d_loss_fake, d_loss_real], feed_dict=feed_dict) sess.run(d_clip) print("d_loss:{}".format(dis_loss)) print("fake:{} true:{}".format(fake_value, true_value)) logging.debug("d_loss:{}".format(dis_loss)) logging.debug("fake:{} true:{}".format(fake_value, true_value)) for j in np.arange(gen_critic): print("epoch:%s, gen iter:%s" % (i, j)) logging.debug("epoch:%s, gen iter:%s" % (i, j)) try: feed_dict, BUCKET_ID = data_iterator.next() except StopIteration: print("out of feed") logging.debug("out of feed") get_data = dataset.DataProvider( pkl_path='./bdwm_data_token.pkl', buckets_size=buckets, batch_size=batch_size) data_iterator = get_data.get_batch() feed_dict, BUCKET_ID = data_iterator.next() g_loss_val, _, d_loss_val = sess.run([g_loss, g_trainer, d_loss], feed_dict=feed_dict) logging.debug("g_loss:{} d_loss:{}".format(g_loss_val, d_loss_val)) print("g_loss:{} d_loss:{}".format(g_loss_val, d_loss_val)) #get gen val for the true bucket gen_val = sess.run(fake_ans, feed_dict=feed_dict) translator.translate_and_print(seq2seq_onehot2label(gen_val), logger=logging) print("save ckpt") logging.debug("save ckpt") saver.save(sess, os.path.join(output_path, 'model.ckpt'), global_step=global_step)
import os import re import sys import glob from utils import Translator with open('key.txt') as file: lines = file.readlines() for line in lines: if not '#' in line: key = line.strip() translator = Translator(key=key) def vid_down(url): os.system( "youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4' --write-thumbnail --write-sub --write-auto-sub {}" .format(url)) def vtt2srt(file_src_path): file_tar_path = file_src_path.replace('.en.vtt', '.en.srt') file_src = open(file_src_path) file_tar = open(file_tar_path, 'w') content_src = file_src.readlines()
def GetResources(self): from utils import Paths, Translator IconPath = Paths.iconsPath() + "/drilling.png" MenuText = str(Translator.translate('Drilling')) ToolTip = str(Translator.translate('Create a drilling op')) return {'Pixmap': IconPath, 'MenuText': MenuText, 'ToolTip': ToolTip}
def GetResources(self): from utils import Paths, Translator IconPath = Paths.iconsPath() + "/drilling.png" MenuText = str(Translator.translate('Drilling')) ToolTip = str(Translator.translate('Create a drilling op')) return {'Pixmap' : IconPath, 'MenuText': MenuText, 'ToolTip': ToolTip}
def writeln(self, message): assert isinstance(message, (str, list)), "Input %s <%s> has to be string or list!" % (message, type(message)) msg = Translator.listToTensorString(message) if isinstance(message, list) else message self.sendall(str.encode(msg + '\n'))
def train(): for l in xrange(buckets[-1][0]): encoder_inputs.append( tf.placeholder(tf.int32, shape=[batch_size], name="encoder{0}".format(l))) for l in xrange(buckets[-1][1]): decoder_inputs.append( tf.placeholder(tf.int32, shape=[batch_size], name="decoder{0}".format(l))) target_weights.append( tf.placeholder(tf.float32, shape=[batch_size], name="weight{0}".format(l))) global_step = tf.Variable(0, name="global_step", trainable=False) true_ans = tf.placeholder(tf.int32, [max_len, batch_size], name="true_ans") seq_len = tf.placeholder(tf.int32, name="seq_len") bucket_id = tf.placeholder(tf.int32, name="bucket_id") #[seq_len * batch_size] with tf.variable_scope('generator'): fake_ans = generator(encoder_inputs, decoder_inputs, target_weights, bucket_id, seq_len) # 创建判别模型 true_ans_one_hot = tf.one_hot(true_ans, num_symbols, on_value=1.0, off_value=0.0, axis=-1) y_data = discriminator(true_ans_one_hot, keep_prob=keep_prob, seq_len=seq_len) y_generated = discriminator(fake_ans, keep_prob=keep_prob, seq_len=seq_len, reuse=True) # 损失函数的设置 d_loss_real = tf.reduce_mean(y_data) d_loss_fake = tf.reduce_mean(y_generated) d_loss = d_loss_fake - d_loss_real g_loss = tf.reduce_mean(-y_generated) optimizer_dis = tf.train.RMSPropOptimizer(learning_rate_dis, name='RMSProp_dis') optimizer_gen = tf.train.RMSPropOptimizer(learning_rate_gen, name='RMSProp_gen') d_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="discriminator") g_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="generator") d_trainer = optimizer_dis.minimize(d_loss, var_list=d_params) g_trainer = optimizer_gen.minimize(g_loss, var_list=g_params) #clip discrim weights d_clip = [ tf.assign(v, tf.clip_by_value(v, CLIP_RANGE[0], CLIP_RANGE[1])) for v in d_params ] init = tf.global_variables_initializer() # Create a saver. saver = tf.train.Saver(var_list=None, max_to_keep=5) sess = tf.Session() sess.run(init) sess.run(d_clip) #load previous variables if to_restore == True: print("reloading variables...") logging.debug("reloading variables...") ckpt = tf.train.get_checkpoint_state(output_path) saver.restore(sess, ckpt.model_checkpoint_path) if os.path.exists(output_path) == False: os.mkdir(output_path) get_data = dataset.DataProvider(pkl_path='./bdwm_data_token.pkl', buckets_size=buckets, batch_size=batch_size) translator = Translator('./dict.txt') print("save ckpt") saver.save(sess, os.path.join(output_path, 'refine_model.ckpt'), global_step=global_step) for i in range(sess.run(global_step), max_epoch): data_iterator = get_data.get_batch() if i < 25 or i % 500 == 0: citers = 100 else: citers = CRITIC for j in np.arange(citers): print("epoch:%s, dis iter:%s" % (i, j)) logging.debug("epoch:%s, dis iter:%s" % (i, j)) try: feed_dict, BUCKET_ID = data_iterator.next() except StopIteration: get_data = dataset.DataProvider( pkl_path='./bdwm_data_token.pkl', buckets_size=buckets, batch_size=batch_size) data_iterator = get_data.get_batch() feed_dict, BUCKET_ID = data_iterator.next() _, dis_loss, fake_value, true_value = sess.run( [d_trainer, d_loss, d_loss_fake, d_loss_real], feed_dict=feed_dict) sess.run(d_clip) print("d_loss:{}".format(dis_loss)) print("fake:{} true:{}".format(fake_value, true_value)) logging.debug("d_loss:{}".format(dis_loss)) logging.debug("fake:{} true:{}".format(fake_value, true_value)) for j in np.arange(gen_critic): print("epoch:%s, gen iter:%s" % (i, j)) logging.debug("epoch:%s, gen iter:%s" % (i, j)) try: feed_dict, BUCKET_ID = data_iterator.next() except StopIteration: get_data = dataset.DataProvider( pkl_path='./bdwm_data_token.pkl', buckets_size=buckets, batch_size=batch_size) data_iterator = get_data.get_batch() feed_dict, BUCKET_ID = data_iterator.next() g_loss_val, _, d_loss_val = sess.run([g_loss, g_trainer, d_loss], feed_dict=feed_dict) logging.debug("g_loss:{} d_loss:{}".format(g_loss_val, d_loss_val)) print("g_loss:{} d_loss:{}".format(g_loss_val, d_loss_val)) #get gen val for the true bucket gen_val = sess.run([fake_ans], feed_dict=feed_dict) print(gen_val) #translator.translate_and_print(seq2seq_onehot2label(gen_val),logger = logging) print("save ckpt") logging.debug("save ckpt") saver.save(sess, os.path.join(output_path, 'model.ckpt'), global_step=global_step)