def test_builtin_function_name_completion(completer): result = result_set(completer, 'SELECT MAX') assert result == { function('MAX', -3), function('MAX_VALID_TIMESTAMP()', -3), keyword('MAXEXTENTS', -3) }
def __init__(self, epsilon=1e-2, shape=()): self._sum = tf.get_variable( dtype=tf.float64, shape=shape, initializer=tf.constant_initializer(0.0), name="runningsum", trainable=False) self._sumsq = tf.get_variable( dtype=tf.float64, shape=shape, initializer=tf.constant_initializer(epsilon), name="runningsumsq", trainable=False) self._count = tf.get_variable( dtype=tf.float64, shape=(), initializer=tf.constant_initializer(epsilon), name="count", trainable=False) self.shape = shape self.mean = tf.to_float(self._sum / self._count) self.std = tf.sqrt( tf.maximum( tf.to_float(self._sumsq / self._count) - tf.square(self.mean), 1e-2 )) newsum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum') newsumsq = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var') newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count') self.incfiltparams = U.function([newsum, newsumsq, newcount], [], updates=[tf.assign_add(self._sum, newsum), tf.assign_add(self._sumsq, newsumsq), tf.assign_add(self._count, newcount)]) self.debug = U.function([], [self.mean, self.std])
def test_user_function_name_completion(completer): result = result_set(completer, 'SELECT cu') assert result == { function('Custom_Fun', -2), function('Custom_Func1', -2), function('custom_func2', -2), function('CURRENT_TIMESTAMP()', -2) }
def __init__(self, meta_batch_size, obs_dim, encoder_units, decoder_units, vocab_size): self.meta_batch_size = meta_batch_size self.obs_dim = obs_dim self.action_dim = vocab_size self.core_policy = Seq2SeqPolicy(obs_dim, encoder_units, decoder_units, vocab_size, name='core_policy') self.meta_policies = [] self.assign_old_eq_new_tasks = [] for i in range(meta_batch_size): self.meta_policies.append( Seq2SeqPolicy(obs_dim, encoder_units, decoder_units, vocab_size, name="task_" + str(i) + "_policy")) self.assign_old_eq_new_tasks.append( U.function([], [], updates=[ tf.compat.v1.assign(oldv, newv) for (oldv, newv) in zipsame( self.meta_policies[i].get_variables(), self.core_policy.get_variables()) ])) self._dist = CategoricalPd(vocab_size)
def minimize(self, step): grad_x, grad_y = grad_func(self.x, self.y) self.grad_first_x = self.beta1 * self.grad_first_x + (1.0 - self.beta1) * grad_x self.grad_first_y = self.beta1 * self.grad_first_y + (1.0 - self.beta1) * grad_y self.grad_second_y = ( self.beta2 * self.grad_second_y + (1.0 - self.beta2) * grad_y ** 2 ) self.grad_second_x = ( self.beta2 * self.grad_second_x + (1.0 - self.beta2) * grad_x ** 2 ) # Bias correction self.grad_first_x_unbiased = self.grad_first_x / (1.0 - self.beta1 ** step) self.grad_first_y_unbiased = self.grad_first_y / (1.0 - self.beta1 ** step) self.grad_second_x_unbiased = self.grad_second_x / (1.0 - self.beta2 ** step) self.grad_second_y_unbiased = self.grad_second_y / (1.0 - self.beta2 ** step) # WEIGHT UPDATE self.x = self.x - self.x * self.grad_first_x_unbiased / ( np.sqrt(self.grad_second_x_unbiased) + self.eps ) self.y = self.y - self.y * self.grad_first_y_unbiased / ( np.sqrt(self.grad_second_y_unbiased) + self.eps ) #for visualization purposes z = function(self.x, self.y) self.x_hist.append(self.x) self.y_hist.append(self.y) self.z_hist.append(z)
def minimize(self): grad_x, grad_y = grad_func(self.x, self.y) self.x = self.x - self.lr * grad_x self.y = self.y - self.lr * grad_y # for visualization purpose z = function(self.x, self.y) self.x_hist.append(self.x) self.y_hist.append(self.y) self.z_hist.append(z)
def __init__(self, x, y, params): # initiale values assert x <= 1.5 and x >= -3, "x values mus be in range [-1,1]" self.x = x self.y = y self.lr = params["lr"] self.x_hist = [x] self.y_hist = [y] self.z_hist = [function(x, y)]
def __init__(self, x, y, params): self.x = x self.y = y self.grad_sqr_x = 0 self.grad_sqr_y = 0 self.lr = params["lr"] self.eps = params["eps"] self.x_hist = [x] self.y_hist = [y] self.z_hist = [function(x, y)]
def __init__(self, x, y, params): assert x <= 1.5 and x >= -3, "x values mus be in range [-1,1]" self.x = x self.y = y self.v_x = 0 self.v_y = 0 self.lr = params["lr"] self.gamma = params["gamma"] self.x_hist = [x] self.y_hist = [y] self.z_hist = [function(x, y)]
def __init__(self, x, y, params): assert x <= 1.5 and x >= -3, "x values mus be in range [-1,1]" self.x = x self.y = y self.grad_sqr_x, self.grad_sqr_y, self.s_x, self.s_y = 0, 0, 0, 0 self.lr = params["lr"] self.eps = params["eps"] self.gamma = params["gamma"] self.x_hist = [x] self.y_hist = [y] self.z_hist = [function(x, y)]
def minmize(self): grad_x, grad_y = grad_func(self.x, self.y) self.grad_sqr_x += np.square(grad_x) self.grad_sqr_y += np.square(grad_y) new_grad_x = self.lr * (1 / np.sqrt(self.eps + self.grad_sqr_x)) * grad_x new_grad_y = self.lr * (1 / np.sqrt(self.eps + self.grad_sqr_y)) * grad_y self.x = self.x - new_grad_x self.y = self.y - new_grad_y # for visualization purposes z = function(self.x, self.y) self.x_hist.append(self.x) self.y_hist.append(self.y) self.z_hist.append(z)
def __init__(self, x, y, params): self.x, self.y = x, y self.lr, self.beta1, self.beta2, self.eps = ( params["lr"], params["beta1"], params["beta2"], params["eps"], ) self.grad_first_x, self.grad_first_y, self.grad_second_x, self.grad_second_y = ( 0, 0, 0, 0, ) self.x_hist = [x] self.y_hist = [y] self.z_hist = [function(x, y)]
testdata = MetaData(metadata) cased_users_col_names = ['ID', 'PARENTID', 'Email', 'First_Name', 'last_name'] cased_users2_col_names = ['UserID', 'UserName'] cased_func_names = [ 'Custom_Fun', '_custom_fun', 'Custom_Func1', 'custom_func2', 'set_returning_func' ] cased_tbls = ['Users', 'Orders'] cased_views = ['User_Emails', 'Functions'] casing = (['SELECT', 'PUBLIC'] + cased_func_names + cased_tbls + cased_views + cased_users_col_names + cased_users2_col_names) # Lists for use in assertions cased_funcs = [ function(f) for f in ('Custom_Fun', '_custom_fun', 'Custom_Func1', 'custom_func2') ] + [function('set_returning_func')] cased_tbls = [table(t) for t in (cased_tbls + ['"Users"', '"select"'])] cased_rels = [view(t) for t in cased_views] + cased_funcs + cased_tbls cased_users_cols = [column(c) for c in cased_users_col_names] aliased_rels = [ table(t) for t in ('users u', '"Users" U', 'orders o', '"select" s') ] + [view('user_emails ue'), view('functions f')] + [ function(f) for f in ('_custom_fun() cf', 'custom_fun() cf', 'custom_func1() cf', 'custom_func2() cf') ] + [ function('set_returning_func(x := , y := ) srf', display='set_returning_func(x, y) srf') ] cased_aliased_rels = [
def __init__(self, **option): # source and target embedding dim sedim, tedim = option["embdim"] # source, target and attention hidden dim shdim, thdim, ahdim = option["hidden"] # maxout hidden dim maxdim = option["maxhid"] # maxout part maxpart = option["maxpart"] # deepout hidden dim deephid = option["deephid"] svocab, tvocab = option["vocabulary"] sw2id, sid2w = svocab tw2id, tid2w = tvocab # source and target vocabulary size svsize, tvsize = len(sid2w), len(tid2w) if "scope" not in option or option["scope"] is None: option["scope"] = "rnnsearch" if "initializer" not in option: option["initializer"] = None if "regularizer" not in option: option["regularizer"] = None dtype = tf.float32 scope = option["scope"] initializer = option["initializer"] # training graph with tf.variable_scope(scope, initializer=initializer, dtype=dtype): src_seq = tf.placeholder(tf.int32, [None, None], "soruce_sequence") src_len = tf.placeholder(tf.int32, [None], "source_length") tgt_seq = tf.placeholder(tf.int32, [None, None], "target_sequence") tgt_len = tf.placeholder(tf.int32, [None], "target_length") with tf.variable_scope("source_embedding"): # place embedding and gather on CPU if you want to save memory # a little bit slower source_embedding = tf.get_variable("embedding", [svsize, sedim], dtype) source_bias = tf.get_variable("bias", [sedim], dtype) with tf.variable_scope("target_embedding"): target_embedding = tf.get_variable("embedding", [tvsize, tedim], dtype) target_bias = tf.get_variable("bias", [tedim], dtype) source_inputs = tf.gather(source_embedding, src_seq) target_inputs = tf.gather(target_embedding, tgt_seq) source_inputs = source_inputs + source_bias target_inputs = target_inputs + target_bias # run encoder cell = gru_cell(shdim) outputs = encoder(cell, source_inputs, src_len, dtype) encoder_outputs, encoder_output_states = outputs # compute initial state for decoder annotation = tf.concat(encoder_outputs, 2) final_state = encoder_output_states[-1] with tf.variable_scope("decoder"): initial_state = tf.tanh(linear(final_state, thdim, True, scope="initial")) # run decoder decoder_outputs = decoder(cell, target_inputs, initial_state, annotation, src_len, tgt_len, ahdim) all_output, all_context, final_state = decoder_outputs # compute costs batch = tf.shape(tgt_seq)[1] zero_embedding = tf.zeros([1, batch, tedim], dtype=dtype) shift_inputs = tf.concat([zero_embedding, target_inputs], 0) shift_inputs = shift_inputs[:-1, :, :] all_states = tf.concat([tf.expand_dims(initial_state, 0), all_output], 0) prev_states = all_states[:-1] shift_inputs = tf.reshape(shift_inputs, [-1, tedim]) prev_states = tf.reshape(prev_states, [-1, thdim]) all_context = tf.reshape(all_context, [-1, 2 * shdim]) with tf.variable_scope("decoder"): features = [prev_states, shift_inputs, all_context] hidden = maxout(features, maxdim, maxpart, True) readout = linear(hidden, deephid, False, scope="deepout") logits = linear(readout, tvsize, True, scope="logits") labels = tf.reshape(tgt_seq, [-1]) ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) ce = tf.reshape(ce, tf.shape(tgt_seq)) mask = tf.sequence_mask(tgt_len, dtype=dtype) mask = tf.transpose(mask) cost = tf.reduce_mean(tf.reduce_sum(ce * mask, 0)) training_inputs = [src_seq, src_len, tgt_seq, tgt_len] training_outputs = [cost] evaluate = function(training_inputs, training_outputs) # encoding encoding_inputs = [src_seq, src_len] encoding_outputs = [annotation, initial_state] encode = function(encoding_inputs, encoding_outputs) # decoding graph with tf.variable_scope(scope or "rnnsearch", reuse=True): prev_words = tf.placeholder(tf.int32, [None], "prev_token") with tf.variable_scope("target_embedding"): target_embedding = tf.get_variable("embedding", [tvsize, tedim], dtype) target_bias = tf.get_variable("bias", [tedim], dtype) target_inputs = tf.gather(target_embedding, prev_words) target_inputs = target_inputs + target_bias # zeros out embedding if y is 0 cond = tf.equal(prev_words, 0) cond = tf.cast(cond, dtype) target_inputs = target_inputs * (1.0 - tf.expand_dims(cond, 1)) max_len = tf.shape(annotation)[0] attention_mask = tf.sequence_mask(src_len, max_len, dtype=dtype) attention_mask = tf.transpose(attention_mask) with tf.variable_scope("decoder"): mapped_states = map_attention_states(annotation, ahdim) alpha = attention(initial_state, mapped_states, ahdim, attention_mask) context = tf.reduce_sum(alpha * annotation, 0) output, next_state = cell([target_inputs, context], initial_state) features = [initial_state, target_inputs, context] hidden = maxout(features, maxdim, maxpart, True) readout = linear(hidden, deephid, False, scope="deepout") logits = linear(readout, tvsize, True, scope="logits") probs = tf.nn.softmax(logits) precomputation_inputs = [annotation] precomputation_outputs = [mapped_states] precompute = function(precomputation_inputs, precomputation_outputs) alignment_inputs = [initial_state, annotation, mapped_states, src_len] alignment_outputs = [alpha, context] align = function(alignment_inputs, alignment_outputs) prediction_inputs = [prev_words, initial_state, context] prediction_outputs = [probs] predict = function(prediction_inputs, prediction_outputs) generation_inputs = [prev_words, initial_state, context] generation_outputs = [next_state] generate = function(generation_inputs, generation_outputs) self.cost = cost self.inputs = training_inputs self.outputs = training_outputs self.align = align self.encode = encode self.predict = predict self.generate = generate self.evaluate = evaluate self.precompute = precompute self.option = option
def map_reduce(file_path=None): from parser import parse from lexer import lexer from utils import function, debug_msg, insert_in_tbl #build hash table from file or scanning the complete source file #check for input args #global the_tbl the_tbl = global_tbl() if not file_path: file_path = "." print "No file_path specified for parsing the source code starting from current directory" if os.path.isfile(file_path) and file_path.endswith(".tbl"): #read file to create the_tbl f = open(file_path) entry = None func = None for line in f: if line.startswith("&#?filename:"): #acquire the global_etntry_tbl lock and add this entry #for this file if entry: the_tbl.lock.qcquire() the_tbl.tbl[entry.get_file_name()] = entry the_tbl.lock.release() filename_start = line.find(':') file_name = line[filename_start+1:] print "File name is: ", file_name entry = global_tbl_entry() entry.set_file_name(file_name) elif line.startswith("\t&#?func_name:"): # &#?func_name:func&#?func_args:int a&#?macro_name:MSG&#?message:I am Nandan&#?msg_args: func = function() function_name_start = line.find(':') function_name_end = line.index("&#?func_args:") func_name = line[function_name_start+1:function_name_end] print "Func name: ", func_name function_args_start = function_name_end + 13 func_args_end = line.index("&#?macro_name:") func_args = line[function_args_start+1:func_args_end] func.set_name(func_name) func.set_args(func_args) print "Func args: ", func_args #message dbg_msg = debug_msg() dbg_macro_start = line.find("&#?macro_name:") + 14 dbg_msg_macro_end = line.index("&#?message:") macro_name = line[dbg_macro_start:dbg_msg_macro_end] print "Macro name: ", macro_name dbg_msg.set_macro_name(macro_name) dbg_msg_start = dbg_msg_macro_end + 11 dbg_msg_end = line.index("&#?msg_args:") msg = line[dbg_msg_start:dbg_msg_end] print "message: ", msg dbg_msg.set_message(msg) dbg_msg_args_start = dbg_msg_end + 12 args = line[dbg_msg_args_start:] dbg_msg.set_args(args) print "msg args: ", args insert_in_tbl(entry,dbg_msg, func) #insert the last entry if entry: the_tbl.lock.acquire() the_tbl.tbl[entry.get_file_name()] = entry the_tbl.lock.release() return the_tbl.tbl #else scan/parse directory to make the_tbl print file_path #create table write table the_tbl = make_tbl(file_path) save_tbl_in_file(the_tbl.tbl) return the_tbl
testdata = MetaData(metadata) cased_users_col_names = ['ID', 'PARENTID', 'Email', 'First_Name', 'last_name'] cased_users2_col_names = ['UserID', 'UserName'] cased_func_names = [ 'Custom_Fun', '_custom_fun', 'Custom_Func1', 'custom_func2', 'set_returning_func' ] cased_tbls = ['Users', 'Orders'] cased_views = ['User_Emails', 'Functions'] casing = ( ['SELECT', 'PUBLIC'] + cased_func_names + cased_tbls + cased_views + cased_users_col_names + cased_users2_col_names ) # Lists for use in assertions cased_funcs = [ function(f) for f in ('Custom_Fun', '_custom_fun', 'Custom_Func1', 'custom_func2') ] + [function('set_returning_func')] cased_tbls = [table(t) for t in (cased_tbls + ['"Users"', '"select"'])] cased_rels = [view(t) for t in cased_views] + cased_funcs + cased_tbls cased_users_cols = [column(c) for c in cased_users_col_names] aliased_rels = [ table(t) for t in ('users u', '"Users" U', 'orders o', '"select" s') ] + [view('user_emails ue'), view('functions f')] + [ function(f) for f in ( '_custom_fun() cf', 'custom_fun() cf', 'custom_func1() cf', 'custom_func2() cf' ) ] + [function( 'set_returning_func(x := , y := ) srf', display='set_returning_func(x, y) srf' )]
def parse_file(self): from lexer import lexer, token from parser import parser from utils import function, debug_msg filename = self.filename entry = global_tbl_entry() print "parser: Lexing on file:", self.filename body = file(self.filename, 'rt').read() print 'parser: rORIGINAL:', repr(body) print print print 'parser: -------------TOKENS:------------------' lexer = lexer(body) parser = parser() func_name = None curly_brace = 0 small_brace = 0 args = "" for token in lexer: #first find a function name store id and lookahead if brace move to state print "parser: parsing token: ", token.get_value()," of type: ", token.get_type() if parser.get_state() == "Begin": print "parser: parser state Begin" if token.get_type() == "Id": parser.set_state("FuncName") func_name = token.get_value() elif parser.get_state() == "FuncName": type(token.get_value()) type(token.get_type()) if token.get_value() == "(": parser.set_state("FuncArgs") elif token.get_type() == "Id": parser.set_state("FuncName") func_name = token.get_value() else: parser.set_state("Begin") elif parser.get_state() == "FuncArgs": if token.get_value() == ")": parser.set_state("FuncBody") elif token.get_value() == ",": print "parser: Comma" elif token.get_type() == "Id": args+=token.get_value() else: print "parser: found: ", token.get_value()," while parser in state Args" #reset parser parser.set_state("Begin") elif parser.get_state() == "FuncBody": if token.get_value() == "{": #confirmed function update everything parser.set_state("Function") aFunction = function() aFunction.set_name(func_name) aFunction.set_args(args); print "parser: ***********Found a function by name : ", func_name, " **************************" curly_brace += 1 #insert function elif token.get_type() == "Id": parser.set_state("FuncName") func_name = token.get_value() else: parser.set_state("Begin") elif parser.get_state() == "Function": if token.get_value() == "}": curly_brace -= 1 if curly_brace == 0: print "parser: ********* Finished function: ",func_name ,"******************" #function ends update everything parser.set_state("Begin") #close messages for this func elif token.get_value() == "{": curly_brace += 1 elif token.get_type() == "Debug": parser.set_state("Debug") dbg_msg = debug_msg() print "MAcro Name ===================================================",token.get_value() dbg_msg.set_macro_name(token.get_value()) elif token.get_type() == "Entry/Exit": parser.set_state("DebugEntry/Exit") dbg_msg = debug_msg() print "MAcro Name ==================================================",token.get_value() dbg_msg.set_macro_name(token.get_value()) elif parser.get_state() == "Debug": if token.get_value() == "(": if small_brace == 0: parser.set_state("DbgMsg") small_brace += 1 elif parser.get_state() == "DbgMsg": if token.get_type() == "Quotes": dbg_msg.set_message(token.get_value()) elif token.get_value() == ")": small_brace -= 1 if small_brace == 0: print "parser: **** Finished one Debug message***** " insert_in_tbl(entry, dbg_msg, aFunction); parser.set_state("Function") else: parser.set_state("DbgMsgArgs") elif parser.get_state() == "DbgMsgArgs": if token.get_value() == ")": small_brace -= 1 if small_brace == 0: print "parser: **** Finished one Debug message***** " insert_in_tbl(entry, dbg_msg,aFunction); parser.set_state("Function") if token.get_value() == "(": small_brace += 1 if token.get_type() in ["Id","Quotes"]: dbg_msg.append_args(token.get_value()) print "parser: ======TESTING: Token value: ",token.get_value() print "parser: ======TESTING: Token type: ",token.get_type() print "parser: ***********all tables ***********************" print print "parser: -----------Rest-------------------" for val in entry.rest_in_list: print "parser: Function: ", val.get_function().get_func_name(), " Message: ", val.get_dbg_msg().get_message()," Debug Args: ", val.get_dbg_msg().get_args() print print "parser: ----------cmplt_msg_tbl--------------------" for hash_key in entry.cmplt_msg_tbl.keys(): val = entry.cmplt_msg_tbl[hash_key] print "parser: Function: ", val.get_function().get_func_name(), " Message: ", val.get_dbg_msg().get_message()," Debug Args: ", val.get_dbg_msg().get_args() print print "parser: ----------partial_msg_tbl--------------------" for hash_key in entry.partial_msg_tbl.keys(): print hash_key val = entry.partial_msg_tbl[hash_key] print "parser: Function: ", val.get_function().get_func_name(), " Message: ", val.get_dbg_msg().get_message()," Debug Args: ", val.get_dbg_msg().get_args() return entry
def test_user_function_name_completion(completer): result = result_set(completer, 'SELECT cu') assert result == {function('Custom_Fun', -2), function('Custom_Func1', -2), function('custom_func2', -2), function('CURRENT_TIMESTAMP()', -2)}
def test_builtin_function_name_completion(completer): result = result_set(completer, 'SELECT MAX') assert result == {function('MAX', -3), function('MAX_VALID_TIMESTAMP()', -3)}
def test_drop_alter_function(completer, action): assert get_result(completer, action + ' FUNCTION set_ret') == [ function('set_returning_func', -len('set_ret')) ]
def __init__(self, num_layers, num_heads, attention_dropout, residual_dropout, relu_dropout, emb_size, hidden_size, filter_size, l1_vocab_size, l2_vocab_size, l1_word2vec, l2_word2vec, **option): if "initializer" in option: initializer = option["initializer"] else: initializer = None def prediction(inputs, output_size): features = [inputs] logits = ops.nn.linear(features, output_size, True, True, scope="logits") logits = tf.reshape(logits, [-1, output_size]) return logits # training graph with tf.variable_scope("rnnsearch", initializer=initializer): l1_seq = tf.placeholder(tf.int32, [None, None], "l1_sequence") l1_len = tf.placeholder(tf.int32, [None], "l1_length") l2_seq = tf.placeholder(tf.int32, [None, None], "l2_sequence") l2_len = tf.placeholder(tf.int32, [None], "l2_length") with tf.device("/cpu:0"): l1_embedding = tf.get_variable("l1_embedding", initializer=l1_word2vec, dtype=tf.float32) l2_embedding = tf.get_variable("l2_embedding", initializer=l2_word2vec, dtype=tf.float32) l1_inputs = tf.gather(l1_embedding, l1_seq) # shape=[batch, step, dim] l2_inputs = tf.gather(l2_embedding, l2_seq) # shape=[batch, step, dim] # encoder l1_mask = tf.sequence_mask(l1_len, maxlen=tf.shape(l1_seq)[1], dtype=tf.float32) with tf.variable_scope("encoder"): emb_bias = tf.get_variable("emb_bias", [emb_size]) l1_inputs = l1_inputs * (emb_size**0.5) l1_inputs = l1_inputs * tf.expand_dims(l1_mask, -1) l1_inputs = tf.nn.bias_add(l1_inputs, emb_bias) l1_inputs = add_position_embedding(l1_inputs) enc_attn_mask = attention_mask(l1_mask, "masking") if residual_dropout > 0.0: keep_prob = 1.0 - residual_dropout l1_inputs = tf.nn.dropout(l1_inputs, keep_prob) annotation = transformer_encoder(l1_inputs, enc_attn_mask, num_layers, num_heads, hidden_size, attention_dropout, residual_dropout, filter_size, relu_dropout) # decoder l2_mask = tf.sequence_mask(l2_len, maxlen=tf.shape(l2_seq)[1], dtype=tf.float32) l2_inputs = l2_inputs * (emb_size**0.5) l2_inputs = l2_inputs * tf.expand_dims(l2_mask, -1) dec_attn_mask = attention_mask(tf.shape(l2_seq)[1], "causal") shift_inputs = tf.pad(l2_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] shift_inputs = add_position_embedding(shift_inputs) if residual_dropout > 0.0: keep_prob = 1.0 - residual_dropout shift_inputs = tf.nn.dropout(shift_inputs, keep_prob) decoder_output = transformer_decoder( shift_inputs, annotation, dec_attn_mask, enc_attn_mask, num_layers, num_heads, hidden_size, attention_dropout, residual_dropout, filter_size, relu_dropout) with tf.variable_scope("decoder"): logits = prediction(decoder_output, l2_vocab_size) labels = tf.reshape(l2_seq, [-1]) ce = ops.nn.smoothed_softmax_cross_entropy_with_logits( logits=logits, labels=labels, smoothing=0.1, normalize=True) ce = tf.reshape(ce, tf.shape(l2_seq)) cost = tf.reduce_sum(ce * l2_mask) / tf.reduce_sum(l2_mask) # ******************************* Training Graph End ********************************* train_inputs = [l1_seq, l1_len, l2_seq, l2_len] train_outputs = [cost] # ******************************** Decoding L1 -> L2 ********************************* with tf.variable_scope("rnnsearch", reuse=True): partial_translation = tf.placeholder(tf.int32, [None, None], "partial_translation") prev_state = { "layer_%d" % i: { "key": tf.placeholder(tf.float32, [None, None, hidden_size], "layer_%d" % i + "_key"), "value": tf.placeholder(tf.float32, [None, None, hidden_size], "layer_%d" % i + "_value") } for i in range(num_layers) } with tf.device("/cpu:0"): l1_embedding = tf.get_variable("l1_embedding", initializer=l1_word2vec, dtype=tf.float32) l2_embedding = tf.get_variable("l2_embedding", initializer=l2_word2vec, dtype=tf.float32) l1_inputs = tf.gather(l1_embedding, l1_seq) l2_inputs = tf.gather(l2_embedding, partial_translation) cond = tf.equal(partial_translation, 0) cond = tf.cast(cond, tf.float32) l2_inputs = l2_inputs * (1.0 - tf.expand_dims(cond, -1)) # encoder l1_mask = tf.sequence_mask(l1_len, maxlen=tf.shape(l1_seq)[1], dtype=tf.float32) with tf.variable_scope("encoder"): emb_bias = tf.get_variable("emb_bias", [emb_size]) l1_inputs = l1_inputs * (emb_size**0.5) l1_inputs = l1_inputs * tf.expand_dims(l1_mask, -1) l1_inputs = tf.nn.bias_add(l1_inputs, emb_bias) l1_inputs = add_position_embedding(l1_inputs) enc_attn_mask = attention_mask(l1_mask, "masking") annotation = transformer_encoder(l1_inputs, enc_attn_mask, num_layers, num_heads, hidden_size, 0.0, 0.0, filter_size, 0.0) # decoder l2_inputs = l2_inputs * (emb_size**0.5) l2_inputs = add_position_embedding(l2_inputs) query = l2_inputs[:, -1:, :] decoder_outputs = transformer_decoder(query, annotation, None, enc_attn_mask, num_layers, num_heads, hidden_size, attention_dropout, residual_dropout, filter_size, relu_dropout, state=prev_state) decoder_output, decoder_state = decoder_outputs decoder_output = decoder_output[:, -1:, :] with tf.variable_scope("decoder"): logits = prediction(decoder_output, l2_vocab_size) probs = tf.nn.softmax(logits) encoding_inputs = [l1_seq, l1_len] encoding_outputs = [annotation, enc_attn_mask] encode = function(encoding_inputs, encoding_outputs) prediction_inputs = [ partial_translation, prev_state, annotation, enc_attn_mask ] prediction_outputs = [probs, decoder_state] predict = function(prediction_inputs, prediction_outputs) self.cost = cost self.inputs = train_inputs self.outputs = train_outputs self.encode = encode self.predict = predict self.option = option
def __init__(self, model, **option): loss = model.cost inputs = model.inputs outputs = model.outputs if "norm" not in option: option["norm"] = False if "constraint" not in option: option["constraint"] = None params = tf.trainable_variables() grads = tf.gradients(loss, params, colocate_gradients_with_ops=True, gate_gradients=True) if option["norm"]: normval = tf.global_norm(grads) outputs = outputs[:] outputs.append(normval) if option["constraint"]: method, value = option["constraint"] if method == "value": min_v = value[0] max_v = value[1] grads = [tf.clip_by_value(g, min_v, max_v) for g in grads] if method == "norm": grads, normval = tf.clip_by_global_norm(grads, value) gvars = [] gvars_and_vars = [] grads_and_gvars = [] for grad, var in zip(grads, params): if grad is None: continue slotvar = create_zeros_slot(var, "gradient") gvars.append(slotvar) gvars_and_vars.append((slotvar, var)) grads_and_gvars.append([grad, slotvar]) grad_updates = gradient_updates(grads_and_gvars) placeholders = [] if "algorithm" not in option: option["algorithm"] = "sgd" if option["algorithm"] == "sgd": varlist = [] lr = tf.placeholder(tf.float32, []) defaults = [('alpha', 1.0)] placeholders.append(lr) var_updates = sgd_updates(gvars_and_vars, lr) elif option["algorithm"] == "rmsprop": lr = tf.placeholder(tf.float32, []) rho = tf.placeholder(tf.float32, []) eps = tf.placeholder(tf.float32, []) varlist = [] svars = [] for gvar in gvars: ms = create_zeros_slot(gvar, "mean_square") mg = create_zeros_slot(gvar, "mean_gradient") svars.append([ms, mg]) varlist.extend([ms, mg]) placeholders.append(lr) placeholders.append(rho) placeholders.append(eps) defaults = [('alpha', 1e-2), ('rho', 0.99), ('epsilon', 1e-8)] var_updates = rmsprop_updates(gvars_and_vars, svars, lr, rho, eps) elif option["algorithm"] == "adam": lr = tf.placeholder(tf.float32, []) beta1 = tf.placeholder(tf.float32, []) beta2 = tf.placeholder(tf.float32, []) eps = tf.placeholder(tf.float32, []) t = tf.Variable(0.0, name="adam_t", dtype=tf.float32, trainable=False) varlist = [t] svars = [t] for gvar in gvars: m = create_zeros_slot(gvar, "m") v = create_zeros_slot(gvar, "v") svars.append([m, v]) varlist.extend([m, v]) placeholders.append(lr) placeholders.append(beta1) placeholders.append(beta2) placeholders.append(eps) defaults = [("alpha", 1e-3), ("beta1", 0.9), ("beta2", 0.999), ("epsilon", 1e-8)] var_updates = adam_updates(gvars_and_vars, svars, lr, beta1, beta2, eps) else: raise ValueError("unknown algorithm %s" % option["algorithm"]) optimize = function(inputs, outputs, updates=grad_updates) update = function(placeholders, [], updates=var_updates) def wrapper(**option): values = [] for item in defaults: name = item[0] val = item[1] if name not in option: option[name] = val values.append(option[name]) return update(*values) self.optimize = optimize self.update = wrapper self.option = option self.variables = varlist
class NMT(object): def __init__(self, hyparams, svocab_size, tvocab_size, eos="</s>", unk="UNK", is_training=True): self.learning_rate = hyparams.learning_rate self.global_step = tf.Variable(0, trainable=False) keep_prob = 1.0 - hyparams.dropout_rate def prediction(prev_inputs, states, context, keep_prob=1.0): if states.get_shape().ndims == 3: states = tf.reshape(states, [-1, hyparams.hidden_size]) if prev_inputs.get_shape().ndims == 3: prev_inputs = tf.reshape(prev_inputs, [-1, hyparams.emb_size]) if context.get_shape().ndims == 3: context = tf.reshape(context, [-1, 2 * hyparams.hidden_size]) features = [states, prev_inputs, context] readout = ops.nn.linear(features, hyparams.emb_size, True, multibias=True, scope="deepout") readout = tf.tanh(readout) if keep_prob < 1.0: readout = tf.nn.dropout(readout, keep_prob=keep_prob) logits = ops.nn.linear(readout, tvocab_size, True, scope="logits") return logits initializer = tf.random_uniform_initializer(hyparams.minval, hyparams.maxval) # training graph with tf.variable_scope("rnnsearch", initializer=initializer): self.src_seq = tf.placeholder(tf.int32, [None, None], "source_sequence") self.src_len = tf.placeholder(tf.int32, [None], "source_length") self.tgt_seq = tf.placeholder(tf.int32, [None, None], "target_sequence") self.tgt_len = tf.placeholder(tf.int32, [None], "target_length") with tf.device("/cpu:0"): source_embedding = tf.get_variable("source_embedding", [svocab_size, hyparams.emb_size], tf.float32) target_embedding = tf.get_variable("target_embedding", [tvocab_size, hyparam.emb_size], tf.floats32) source_inputs = tf.gather(source_embedding, self.src_seq) target_inputs = tf.gather(target_embedding, self.tgt_seq) if keep_prob < 1.0: source_inputs = tf.nn.dropout(source_inputs, keep_prob) target_inputs = tf.nn.dropout(target_inputs, keep_prob) cell = ops.rnn_cell.GRUCell(hyparams.hidden_size) annotation = encoder(cell, cell, source_inputs, self.src_len) with tf.variable_scope("decoder"): ctx_sum = tf.reduce_sum(annotation, 0) initial_state = ops.nn.linear(ctx_sum, hyparams.hidden_size, True, scope="initial") initial_state = tf.tanh(initial_state) zero_embbedding = tf.zeros( [1, tf.shape(self.tgt_seq)[1], hyparams.emb_size]) shift_inputs = tf.concat([zero_embbedding, target_inputs], 0) shift_inputs = shift_inputs[:-1, :, :] shift_inputs.set_shape([None, None, hyparams.emb_size]) cell = ops.rnn_cell.GRUCell(hyparams.hidden_size) decoder_outputs = decoder(cell, shift_inputs, initial_state, annotation, self.src_len, hyparams.attn_size) output, context = decoder_outputs with tf.variable_scope("decoder"): logits = prediction(shift_inputs, output, context, keep_prob=keep_prob) labels = tf.reshape(self.tgt_seq, [-1]) ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) ce = tf.reshape(ce, tf.shape(self.tgt_seq)) mask = tf.sequence_mask(self.tgt_len, dtype=tf.float32) mask = tf.transpose(mask) cost = tf.reduce_mean(tf.reduce_sum(ce * mask), 0) self.cross_entropy_loss = cost # Gradients and SGD update operation for training the model. params = tf.trainable_variables() self.optimizer = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(self.cross_entropy_loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, hyparams.max_gradient_norm) self.updates = self.optimizer.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables()) #decoding graph with tf.variable_scope("rnn_search", reuse=True) as scope: prev_word = tf.placeholder(tf.int32, [None], "prev_token") with tf.device("/cpu0:"): source_embedding = tf.get_variable("source_embedding", [svocab_size, hyparams.emb_size], tf.float32) target_embedding = tf.get_variable("target_embedding", [tvocab_size, hyparams.emb_size], tf.float32) source_inputs = tf.gather(source_embedding, self.src_seq) target_inputs = tf.gather(target_embedding, prev_word) cond = tf.equal(prev_word, 0) cond = tf.cast(cond, tf.float32) target_inputs = target_inputs * (1.0 - tf.expand_dims(cond, 1)) #encoder cell = ops.rnn_cell.GRUCell(hyparams.hidden_size) annotation = encoder(cell, cell, source_inputs, self.src_len) #decoder with tf.variable_scope("decoder"): ctx_sum = tf.reduce_sum(annotation, 0) initial_state = ops.nn.linear(ctx_sum, hyparams.hidden_size, True, scope="initial") initial_state = tf.tanh(initial_state) with tf.variable_scope("decoder"): mask = tf.sequence_mask(self.src, tf.shape(self.src_seq)[0], dtype=tf.float32) mask = tf.transpose(mask) mapped_states = attention(None, annotation, None, None, hyparams.attn_size) cell = ops.rnn_cell.GRUCell(hyparams.hidden_size) with tf.variable_scope("decoder"): with tf.variable_scope("below"): output, state = cell(target_inputs, initial_state) alpha = attention(output, annotation, mapped_states, mask, hyparams.attn_size) context = tf.reduce_sum(alpha * annotation, 0) with tf.variable_scope("above"): output, next_state = cell(context, state) logits = prediction(target_inputs, next_state, context) probs = tf.nn.softmax(logits) encoding_inputs = [self.src_seq, self.src_len] encoding_outputs = [annotation, mapped_states, initial_state, mask] encode = function(encoding_inputs, encoding_outputs) prediction_inputs = [ prev_word, initial_state, annotation, mapped_states, mask ] prediction_outputs = [probs, next_state, alpha] predict = function(prediction_inputs, prediction_outputs) self.encode = encode self.predict = predict def train_step(self, sess, train_set, svocab, tvocab, unk, iteration): sbatch_data, sdata_length, tbatch_data, tdata_length = \ data_utils.nextBatch(train_set, svocab, tvocab, unk, iteration * self.batch_size, self.batch_size) _, step_loss = \ sess.run( [ self.updates, self.cross_entropy_loss, ], feed_dict = { self.src_seq: sbatch_data, self.src_len: sdata_length, self.tgt_seq: tbatch_data, self.tgt_len: tdata_length } ) return step_loss * len(tdata_length) / sum(tdata_length)