def get_dynamic_rebar_gradient(self): """Get the dynamic rebar gradient (t, eta optimized).""" tiled_pre_temperature = tf.tile([self.pre_temperature_variable], [self.batch_size]) temperature = tf.exp(tiled_pre_temperature) hardELBO, nvil_gradient, logQHard = self._create_hard_elbo() if self.hparams.quadratic: gumbel_cv, extra = self._create_gumbel_control_variate_quadratic(logQHard, temperature=temperature) else: gumbel_cv, extra = self._create_gumbel_control_variate(logQHard, temperature=temperature) f_grads = self.optimizer_class.compute_gradients(tf.reduce_mean(-nvil_gradient)) eta = {} h_grads, eta_statistics = self.multiply_by_eta_per_layer( self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)), eta) model_grads = U.add_grads_and_vars(f_grads, h_grads) total_grads = model_grads # Construct the variance objective g = U.vectorize(model_grads, set_none_to_zero=True) self.maintain_ema_ops.append(self.ema.apply([g])) gbar = 0 #tf.stop_gradient(self.ema.average(g)) variance_objective = tf.reduce_mean(tf.square(g - gbar)) reinf_g_t = 0 if self.hparams.quadratic: for layer in xrange(self.hparams.n_layer): gumbel_learning_signal, _ = extra[layer] df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0] reinf_g_t_i, _ = self.multiply_by_eta_per_layer( self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * logQHard[layer])), eta) reinf_g_t += U.vectorize(reinf_g_t_i, set_none_to_zero=True) reparam = tf.add_n([reparam_i for _, reparam_i in extra]) else: gumbel_learning_signal, reparam = extra df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0] reinf_g_t, _ = self.multiply_by_eta_per_layer( self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * tf.add_n(logQHard))), eta) reinf_g_t = U.vectorize(reinf_g_t, set_none_to_zero=True) reparam_g, _ = self.multiply_by_eta_per_layer( self.optimizer_class.compute_gradients(tf.reduce_mean(reparam)), eta) reparam_g = U.vectorize(reparam_g, set_none_to_zero=True) reparam_g_t = tf.gradients(tf.reduce_mean(2*tf.stop_gradient(g - gbar)*reparam_g), self.pre_temperature_variable)[0] variance_objective_grad = tf.reduce_mean(2*(g - gbar)*reinf_g_t) + reparam_g_t debug = { 'ELBO': hardELBO, 'etas': eta_statistics, 'variance_objective': variance_objective, } return total_grads, debug, variance_objective, variance_objective_grad
def make_data(data, wordvec_file, ans_file=None, base_dir='./rawdata'): if ans_file: ans_data = pd.read_csv(base_dir + ans_file, sep='\t').set_index('q_id') answer_map = {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4} ans_data['answer'] = ans_data['answer'].apply(lambda x: answer_map[x]) wordvec = pd.read_csv(base_dir + wordvec_file, sep=' ', header=None, index_col=0) vectors = {} for qid, data_dict in data.items(): vectors[qid] = {} ques_vec = vectorize(data_dict['question'], wordvec) capt_vec = vectorize(data_dict['caption'], wordvec) context = np.concatenate((ques_vec, capt_vec)) X = [] for choice in data_dict['choices']: choi_vec = vectorize(choice, wordvec) x = np.concatenate((context, choi_vec)) X.append(x) vectors[qid]['X'] = np.array(X) if ans_file: ans = ans_data.loc[qid]['answer'] y = [([0, 1] if i == ans else [1, 0]) for i in range(5)] vectors[qid]['y'] = np.array(y) return vectors
def init(): path = config.data_path config.embedding_file = os.path.join(path, config.embedding_file) config.embedding_vocab = os.path.join(path, config.embedding_vocab) config.train_file = os.path.join(path, config.train_file) config.test_file = os.path.join(path, config.test_file) # Config log if config.log_file is None: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%m-%d %H:%M') else: if not os.path.exists(config.save_path): os.makedirs(config.save_path) logging.basicConfig(filename=config.log_file, filemode='a', level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%m-%d %H:%M') # Load data # data = (sentences, relations, e1_pos, e2_pos) train_data = utils.load_data(config.train_file) test_data = utils.load_data(config.test_file) logging.info('trian data: %d' % len(train_data[0])) logging.info('test data: %d' % len(test_data[0])) # Build vocab word_dict = utils.build_dict(train_data[0] + test_data[0]) logging.info('total words: %d' % len(word_dict)) embeddings = utils.load_embedding(config, word_dict) # Log parameters flags = config.__dict__['__flags'] flag_str = "\n" for k in flags: flag_str += "\t%s:\t%s\n" % (k, flags[k]) logging.info(flag_str) # vectorize data # vec = (sents_vec, relations, e1_vec, e2_vec, dist1, dist2) max_len_train = len(max(train_data[0], key=lambda x: len(x))) max_len_test = len(max(test_data[0], key=lambda x: len(x))) max_len = max(max_len_train, max_len_test) config.max_len = max_len train_vec = utils.vectorize(train_data, word_dict, max_len) test_vec = utils.vectorize(test_data, word_dict, max_len) return embeddings, train_vec, test_vec
def stopRecording(self, event): ''' Arrete l'enregistrement des frames, et convertit le buffer des frames vers un vecteur de la meme langeur selon le type ('gestuelle' ou 'fixé') ''' if self.isRecording: value = self.currentSign # arrete le thread d'enregistrement self.isRecording = False if self.currentSign.type == 'Fixed' and len(self.vectorBuffer): self.currentSign.samples.append( utils.vectorize(self.vectorBuffer, fixed=True)) # On vérifie qu'on a au moins 10 frames pour créer la gestuelle elif len(self.vectorBuffer) >= 10: self.currentSign.samples.append((self.vectorBuffer)) else: tkMessageBox.showwarning( '', 'Peu de frames enregistré. réassayer') return if len(self.currentSign.samples) >= 2: if not utils.validateFeatures(self.currentSign.samples): tkMessageBox.showwarning('', \ 'Valeur null enregistré depuis le capteur. Supprimer les captures du signe actuel et réassayer') pickle.dump(self.dataset, open('dataset.p', 'wb')) self.updateSamplesList()
def eval_acc(data): dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(data, word_dict, entity_dict) all_dev = qa_model.gen_examples(dev_x1, dev_x2, dev_l, dev_y, args.batch_size) dev_acc = qa_model.eval_acc(test_fn, all_dev) return dev_acc
def compute_gradient_moments(self, grads_and_vars): first_moment = U.vectorize(grads_and_vars, set_none_to_zero=True) second_moment = tf.square(first_moment) self.maintain_ema_ops.append( self.ema.apply([first_moment, second_moment])) return self.ema.average(first_moment), self.ema.average(second_moment)
def preprocess(self, train, test): if self.model_type == "ML": if self.vectorizer == 'tf-igm': X, Y, test_X, test_Y, num_words = tf_igm_vectorizer( train, test, stopwords_file=self.stopwords_file) else: X, Y, test_X, test_Y, num_words = vectorize( train, test, vectorizer=self.vectorizer, stopwords_file=self.stopwords_file) Y = np.argmax(Y, axis=1) test_Y = np.argmax(test_Y, axis=1) else: if self.vectorizer == 'tf-igm': X, Y, test_X, test_Y, num_words = tf_igm_vectorizer( train, test, stopwords_file=self.stopwords_file) else: X, Y, test_X, test_Y, num_words = tokenize( train, test, stopwords_file=self.stopwords_file, maxlen=self.maxlen) return X, Y, test_X, test_Y, num_words
def get_rebar_gradient(self): """Get the rebar gradient.""" hardELBO, nvil_gradient, logQHard = self._create_hard_elbo() if self.hparams.quadratic: gumbel_cv, _ = self._create_gumbel_control_variate_quadratic( logQHard) else: gumbel_cv, _ = self._create_gumbel_control_variate(logQHard) f_grads = self.optimizer_class.compute_gradients( tf.reduce_mean(-nvil_gradient)) eta = {} h_grads, eta_statistics = self.multiply_by_eta_per_layer( self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)), eta) model_grads = U.add_grads_and_vars(f_grads, h_grads) total_grads = model_grads # Construct the variance objective variance_objective = tf.reduce_mean( tf.square(U.vectorize(model_grads, set_none_to_zero=True))) debug = { 'ELBO': hardELBO, 'etas': eta_statistics, 'variance_objective': variance_objective, } return total_grads, debug, variance_objective
def test(model,data,w2i,batch_size,task_id): print "start test---" model.eval() correct = 0 count = 0 for i in range(0,len(data)-batch_size,batch_size): batch_data = data[i:i+batch_size] story = [d[0] for d in batch_data] q = [d[1] for d in batch_data] a = [d[2][0] for d in batch_data] story_len = min(max_story_len,max([len(s) for s in story])) s_sent_len = max([len(sent) for s in story for sent in s]) q_sent_len = max([len(sent) for sent in q]) vec_data = vectorize(batch_data,w2i,story_len,s_sent_len,q_sent_len) story = [d[0] for d in vec_data] q = [d[1] for d in vec_data] a = [d[2][0] for d in vec_data] story = to_var(torch.LongTensor(story)) q = to_var(torch.LongTensor(q)) a = to_var(torch.LongTensor(a)) pred = model(story,q) pred_idx = pred.max(1)[1] correct += torch.sum(pred_idx == a).item() count+=batch_size acc = (correct.__float__()/count)*100 print('correct:',correct) print('Task {} Test Acc: {} % - '.format(task_id,acc),correct,'/',count) return acc
def get_rebar_gradient(self): """Get the rebar gradient.""" hardELBO, nvil_gradient, logQHard = self._create_hard_elbo() if self.hparams.quadratic: gumbel_cv, _ = self._create_gumbel_control_variate_quadratic(logQHard) else: gumbel_cv, _ = self._create_gumbel_control_variate(logQHard) f_grads = self.optimizer_class.compute_gradients(tf.reduce_mean(-nvil_gradient)) eta = {} h_grads, eta_statistics = self.multiply_by_eta_per_layer( self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)), eta) model_grads = U.add_grads_and_vars(f_grads, h_grads) total_grads = model_grads # Construct the variance objective variance_objective = tf.reduce_mean(tf.square(U.vectorize(model_grads, set_none_to_zero=True))) debug = { 'ELBO': hardELBO, 'etas': eta_statistics, 'variance_objective': variance_objective, } return total_grads, debug, variance_objective
def stopRecording(self, event): ''' Stop ongoing recording of frames, and based on sign type (fixed or gesture) convert buffer of frames to a vector with the corresponding length ''' if self.isRecording: value = self.currentSign # Kill recording thread self.isRecording = False # If sign recorded was fixed/static and has at least one frame if self.currentSign.type == 'Fixed' and len(self.vectorBuffer): self.currentSign.samples.append(utils.vectorize(self.vectorBuffer, fixed=True)) # Make sure we have enough frames to create 10 keyframes for # gestures signs elif len(self.vectorBuffer) >= 10: self.currentSign.samples.append((self.vectorBuffer, fixed=False)) # Not enough frames were recorded, return before making changes else: tkMessageBox.showwarning('', 'Too few frames recorded. Try again') return # Check that features across all samples are non-zero if len(self.currentSign.samples) >= 2: if not utils.validateFeatures(self.currentSign.samples): tkMessageBox.showwarning('', \ 'Zero-values recorded from sensor. Delete samples for current sign and try again') # Just to be safe, update dataset file despite not exiting program pickle.dump(self.dataset, open('dataset.p', 'wb')) self.updateSamplesList()
def use_reinforce(env, n_episode, n_step, start_alpha, info_times=20): print('Start REINFORCE with', n_episode, n_step, start_alpha, info_times) theta1, theta2 = [0. for i in range(env.observation_dim) ], [0. for i in range(env.observation_dim)] alpha = start_alpha average_score = 0 for i_episode in range(n_episode): observation = vectorize(env.reset()) score = 0 actions = [] states = [] rewards = [] for i_step in range(n_step): prev_observation = observation.copy() action = apply_policy(theta1, theta2, observation) if i_episode == n_episode - 1: env.render() observation, reward, done, info = env.step(action) observation = vectorize(observation) states.append(prev_observation) actions.append(action) rewards.append(reward) score += reward # env.render() if done: break alpha = update_alpha(start_alpha, i_episode) average_score += score if i_episode and i_episode % (n_episode / info_times) == 0: average_score /= (n_episode / info_times) print('Progression : {0} Average Score : {1}, Alpha : {2}'.format( i_episode / n_episode, average_score, alpha)) # print('Episode ended in {0} steps, score is {1}'.format(survival_time, score)) # print(theta1) # alpha = update_alpha(score) theta1 = update_theta(theta1, alpha, states, actions, rewards) theta2 = update_theta(theta2, alpha, states, actions, rewards)
def test_vectorize(self): with self.assertRaises(AssertionError): vectorize([]) array = vectorize(np.array([])) self.assertEqual(array.shape, (0, 1)) array = vectorize(np.zeros((2, 1))) self.assertEqual(array.shape, (2, 1)) array = vectorize(np.zeros((1, 3))) self.assertEqual(array.shape, (3, 1)) array = vectorize(np.zeros((3, 4, 2, 5))) self.assertEqual(array.shape, (120, 1)) image = np.array([[[0.67826139, 0.29380381], [0.90714982, 0.52835647], [0.4215251, 0.45017551]], [[0.92814219, 0.96677647], [0.85304703, 0.52351845], [0.19981397, 0.27417313]], [[0.60659855, 0.00533165], [0.10820313, 0.49978937], [0.34144279, 0.94630077]]]) expected = np.array([[0.67826139], [0.29380381], [0.90714982], [0.52835647], [0.4215251], [0.45017551], [0.92814219], [0.96677647], [0.85304703], [0.52351845], [0.19981397], [0.27417313], [0.60659855], [0.00533165], [0.10820313], [0.49978937], [0.34144279], [0.94630077]]) self.assertTrue(np.allclose(vectorize(image), expected))
def simulate(self, state_node, copy_env, depth): if state_node.done or depth == self.max_depth or (state_node.look == 'unknown' and state_node.inv == 'unknown'): return 0 best_action_node = self.greedy_action_node(state_node, self.exploration_constant, self.bonus_constant) rollout_next = False ob, reward, done, info = copy_env.step(best_action_node.action, valid_out=False) next_state_text = ob + info['look'] + info['inv'] if '*** You have won ***' in next_state_text or '*** You have died ***' in next_state_text: score = int(next_state_text.split('you scored ')[1].split(' out of')[0]) reward = score - state_node.score info['score'] = score self.write_buffer(state_node, best_action_node, ob, reward, done, info) if next_state_text in best_action_node.children_text: index = best_action_node.children_text.index(next_state_text) next_state_node = best_action_node.children[index] if next_state_node.N == 0: rollout_next = True next_state_node.N += 1 else: if next_state_text in self.valid_action_dict.keys(): info['valid'] = self.valid_action_dict[next_state_text] else: info['valid'] = copy_env.get_valid(ob) self.valid_action_dict[next_state_text] = info['valid'] next_state_node = self.build_state(ob, info, reward, prev_action=best_action_node.action) best_action_node.children.append(next_state_node) best_action_node.children_text.append(next_state_node.state) rollout_next = True if rollout_next: R = reward + self.discount_factor * self.rollout(next_state_node, copy_env, depth+1) else: R = reward + self.discount_factor * self.simulate(next_state_node, copy_env, depth+1) state_node.N += 1 best_action_node.N += 1 if self.uct_type == 'MC-LAVE': if not best_action_node.action in self.action_embedding.keys(): embed_vector = utils.vectorize(best_action_node.action) self.action_embedding[best_action_node.action] = embed_vector action_value = self.q_network.get_q_value(ob, info['look'], info['inv'], state_node.prev_action, info['score'], best_action_node.action) self.action_values[best_action_node.action].add(action_value) best_action_node.Rs.append(R) best_action_node.Q = np.sum(np.array(best_action_node.Rs) * utils.softmax(best_action_node.Rs, T=10)) return R
def test(args, word_dict, entity_dict, train_fn, test_fn, params): dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling) dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(dev_examples, word_dict, entity_dict) assert len(dev_x1) == args.num_dev all_dev = gen_examples(dev_x1, dev_x2, dev_l, dev_y, args.batch_size) dev_acc = eval_acc(test_fn, all_dev) return dev_acc
def prepare_quries_answers(args): chat_data = utils.load_data(args.data_dir) chat_data = utils.filter_sentences(chat_data, args.whitelist) index2word, word2index = utils.build_vocab(chat_data, max_words=args.max_words) Limits.q_max_len, Limits.a_max_len, Limits.q_min_len, Limits.a_min_len = args.q_max_len, \ args.a_max_len, args.q_min_len, args.a_min_len queries, answers = utils.split_data(chat_data, Limits) queries, answers = utils.vectorize(queries, answers, word2index, sort_by_len=True) return queries, answers, index2word, word2index
def greedy_action_node(self, state_node, exploration_constant, bonus_constant): best_value = -np.inf best_children = [] for i in range(len(state_node.children)): child = state_node.children[i] child_prob = state_node.children_probs[i] if exploration_constant == 0: ucb_value = child.Q elif self.uct_type == 'UCT': ucb_value = child.Q + exploration_constant * np.sqrt(np.log(state_node.N + 1) / (child.N + 1)) elif self.uct_type == 'PUCT': ucb_value = child.Q + exploration_constant * child_prob * np.sqrt(state_node.N + 1) / (child.N + 1) elif self.uct_type == 'MC-LAVE': if child.action in self.action_embedding.keys(): action_e = self.action_embedding[child.action] else: action_e = utils.vectorize(child.action) self.action_embedding[child.action] = action_e actions = list(self.action_values.keys()) if child.action in actions: actions.pop(actions.index(child.action)) actions_e = [] for a in actions: actions_e.append(self.action_embedding[a]) near_act, near_idx = utils.find_near_actions(action_e, actions, np.array(actions_e), threshold=0.8) if len(near_idx) == 0: child.Q_hat = 0 else: near_Qs = set() for a in near_act: near_Qs.add(np.mean(list(self.action_values[a]))) near_Qs = list(near_Qs) child.Q_hat = utils.softmax_value(near_Qs) ucb_value = child.Q \ + exploration_constant * np.sqrt(state_node.N + 1) / (child.N + 1) * child_prob \ + bonus_constant * np.sqrt(state_node.N + 1) / (child.N + 1) * child.Q_hat else: raise NotImplementedError if ucb_value == best_value: best_children.append(child) elif ucb_value > best_value: best_value = ucb_value best_children = [child] return np.random.choice(best_children)
def train(model, train_data, test_data, optimizer, loss_fn, w2i, task_id, batch_size, n_epoch): for epoch in range(n_epoch): model.train() # print('epoch', epoch) correct = 0 count = 0 random.shuffle(train_data) for i in range(0, len(train_data) - batch_size, batch_size): batch_data = train_data[i:i + batch_size] story = [d[0] for d in batch_data] story_len = min(max_story_len, max([len(s) for s in story])) story_sent_len = max([len(sent) for s in story for sent in s]) question = [d[1] for d in batch_data] question_sent_len = max([len(sent) for sent in question]) vec_data = vectorize(batch_data, w2i, story_len, story_sent_len, question_sent_len) story = [d[0] for d in vec_data] question = [d[1] for d in vec_data] answer = [d[2][0] for d in vec_data] story = to_var(torch.LongTensor(story)) question = to_var(torch.LongTensor(question)) answer = to_var(torch.LongTensor(answer)) pred = model(story, question) loss = loss_fn(pred, answer) optimizer.zero_grad() loss.backward() optimizer.step() # reset padding index weight for name, param in model.named_parameters(): if param.grad is not None: if 'A.' in name: param.data[0] = 0 pred_idx = pred.max(1)[1] correct += torch.sum(pred_idx == answer).item() count += batch_size # for p in model.parameters(): # torch.nn.utils.clip_grad_norm(p, 40.0) if epoch % 20 == 0: print('=======Epoch {}======='.format(epoch)) print('Training Acc: {:.2f}% - '.format(correct / count * 100), correct, '/', count) test(model, test_data, w2i, batch_size, task_id)
def classify(self, _buffer, fixed): ''' Classify sign based on input and display message on result ''' result = None # Classify fixed sign if fixed and self.nbFixed: result = self.nbFixed.probabilities( utils.vectorize(_buffer, fixed=True)) # Classify gesture sign elif self.nbGesture: result = self.nbGesture.probabilities( utils.vectorize(_buffer, fixed=False)) # Novelty detection based on results euclidean distance if result[0][2] < 0.85: self.output.set(result[0][0]) else: self.output.set("not recognized...") sleep(3) self.output.set("")
def _create_train_op(self, grads_and_vars, extra_grads_and_vars=[]): ''' Args: grads_and_vars: gradients to apply and compute running average variance extra_grads_and_vars: gradients to apply (not used to compute average variance) ''' # Variance summaries first_moment = U.vectorize(grads_and_vars, skip_none=True) second_moment = tf.square(first_moment) self.maintain_ema_ops.append( self.ema.apply([first_moment, second_moment])) # Add baseline losses if len(self.baseline_loss) > 0: mean_baseline_loss = tf.reduce_mean(tf.add_n(self.baseline_loss)) extra_grads_and_vars += self.optimizer_class.compute_gradients( mean_baseline_loss, var_list=tf.get_collection('BASELINE')) # Ensure that all required tensors are computed before updates are executed extra_optimizer = tf.train.AdamOptimizer(learning_rate=10 * self.hparams.learning_rate, beta2=self.hparams.beta2) with tf.control_dependencies([ tf.group(*[ g for g, _ in (grads_and_vars + extra_grads_and_vars) if g is not None ]) ]): # Filter out the P_COLLECTION variables if we're in eval mode if self.eval_mode: grads_and_vars = [(g, v) for g, v in grads_and_vars if v not in tf.get_collection(P_COLLECTION)] train_op = self.optimizer_class.apply_gradients( grads_and_vars, global_step=self.global_step) if len(extra_grads_and_vars) > 0: extra_train_op = extra_optimizer.apply_gradients( extra_grads_and_vars) else: extra_train_op = tf.no_op() self.optimizer = tf.group(train_op, extra_train_op, *self.maintain_ema_ops) # per parameter variance variance_estimator = (self.ema.average(second_moment) - tf.square(self.ema.average(first_moment))) self.grad_variance = tf.reduce_mean(variance_estimator)
def classify(self, _buffer, fixed): ''' Classifie le signe selont l'input et affiche un message ''' result = None # Classifie les signes fixes if fixed and self.nbFixed: result = self.nbFixed.probabilities( utils.vectorize(_buffer, fixed=True)) # Classifie les signes gestuelles elif self.nbGesture: result = self.nbGesture.probabilities( utils.vectorize(_buffer, fixed=False)) # Detection basée sur les résultats de la distance euclidienne if result[0][2] < 0.85: self.output.set(result[0][0]) else: self.output.set("non reconnue ...") sleep(3) self.output.set("")
def main(args): logging.info('-' * 50) logging.info('Load data files..') question_belong = [] if args.debug: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, 100, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, 100, relabeling=args.relabeling, question_belong=question_belong) else: # logging.info('*' * 10 + ' Train') # train_examples = utils.load_data(args.train_file, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling, question_belong=question_belong) # args.num_train = len(train_examples[0]) args.num_dev = len(dev_examples[0]) logging.info('-' * 50) logging.info('Build dictionary..') # word_dict = utils.build_dict(train_examples[0] + train_examples[1] + train_examples[2], args.max_vocab_size) word_dict = pickle.load(open("../../obj/dict.pkl", "rb")) logging.info('-' * 50) embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file) (args.vocab_size, args.embedding_size) = embeddings.shape logging.info('Compile functions..') train_fn, test_fn, attention_fn, params, all_params = build_fn(args, embeddings) logging.info('Done.') logging.info('-' * 50) logging.info(args) logging.info('-' * 50) logging.info('Intial test..') dev_x1, dev_x2, dev_x3, dev_y = utils.vectorize(dev_examples, word_dict, sort_by_len=not args.test_only, concat=args.concat) word_dict_r = {} word_dict_r[0] = "unk" assert len(dev_x1) == args.num_dev all_dev = gen_examples(dev_x1, dev_x2, dev_x3, dev_y, args.batch_size, args.concat) dev_acc, n_examples, prediction, all_examples= eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc.mean()) print(dev_acc.mean()) alpha= attention_func(attention_fn, all_dev) if args.test_only: return dev_acc,n_examples, prediction, all_examples, alpha
def test(model, data, act_data, emo_data): print('Test---------------------') model.eval() total, acc = 0, 0 for i, (d, a, e) in tqdm(enumerate(zip(data, act_data, emo_data)), total=len(data)): batch = vectorize(w2i, d, a, e) x = torch.stack([turn[0] for turn in batch], 0) act_labels = torch.stack([turn[1] for turn in batch], 0).squeeze(1) preds = model(x) acc += torch.sum(act_labels == torch.max(preds, 1)[1]).data[0] total += x.size(0) if i % 500 == 0: print('Acc {:.2f}%'.format(100 * acc / total)) print('Final Acc {:.2f}%'.format(100 * acc / total))
def _create_train_op(self, grads_and_vars, extra_grads_and_vars=[]): ''' Args: grads_and_vars: gradients to apply and compute running average variance extra_grads_and_vars: gradients to apply (not used to compute average variance) ''' # Variance summaries first_moment = U.vectorize(grads_and_vars, skip_none=True) second_moment = tf.square(first_moment) self.maintain_ema_ops.append(self.ema.apply([first_moment, second_moment])) # Add baseline losses if len(self.baseline_loss) > 0: mean_baseline_loss = tf.reduce_mean(tf.add_n(self.baseline_loss)) extra_grads_and_vars += self.optimizer_class.compute_gradients( mean_baseline_loss, var_list=tf.get_collection('BASELINE')) # Ensure that all required tensors are computed before updates are executed extra_optimizer = tf.train.AdamOptimizer( learning_rate=10*self.hparams.learning_rate, beta2=self.hparams.beta2) with tf.control_dependencies( [tf.group(*[g for g, _ in (grads_and_vars + extra_grads_and_vars) if g is not None])]): # Filter out the P_COLLECTION variables if we're in eval mode if self.eval_mode: grads_and_vars = [(g, v) for g, v in grads_and_vars if v not in tf.get_collection(P_COLLECTION)] train_op = self.optimizer_class.apply_gradients(grads_and_vars, global_step=self.global_step) if len(extra_grads_and_vars) > 0: extra_train_op = extra_optimizer.apply_gradients(extra_grads_and_vars) else: extra_train_op = tf.no_op() self.optimizer = tf.group(train_op, extra_train_op, *self.maintain_ema_ops) # per parameter variance variance_estimator = (self.ema.average(second_moment) - tf.square(self.ema.average(first_moment))) self.grad_variance = tf.reduce_mean(variance_estimator)
def train(model, data, act_data, emo_data, start_epoch=0, n_epochs=20): print('Train---------------------') model.train() loss_fn = F.nll_loss for epoch in range(start_epoch, n_epochs): print('Epoch', epoch) indices = [i for i in range(len(data))] random.shuffle(indices) total, acc = 0, 0 # for i, (d, a, e) in tqdm(enumerate(zip(data, act_data, emo_data)), total=len(data)): for i, idx in tqdm(enumerate(indices), total=len(indices)): d, a, e = data[idx], act_data[idx], emo_data[idx] batch = vectorize(w2i, d, a, e) x = torch.stack([turn[0] for turn in batch], 0) act_labels = torch.stack([turn[1] for turn in batch], 0).squeeze(1) preds = model(x) acc += torch.sum(act_labels == torch.max(preds, 1)[1]).data[0] total += x.size(0) loss = loss_fn(preds, act_labels) # print(loss.data[0]) optimizer.zero_grad() loss.backward() optimizer.step() if i % 500 == 0: print('Epoch {}, Acc {:.2f}%, loss {:.3f}'.format( epoch, 100 * acc / total, loss.data[0])) filename = 'ckpts/SimpleLSTM-Epoch-{}.model'.format(epoch) save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, filename=filename)
dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] # Generate batches for one epoch all_labels = [] all_preds = [] for batch in test.bacth_iter(FLAGS.batch_size, desc="Testing", shuffle=False): labels, docs = zip(*batch) padded_docs, _, _, _ = vectorize(docs) feed_dict = { inputs: padded_docs, # inputs_mask: padded_docs_mask, is_training: False, dropout_keep_prob: 1.0 } preds = sess.run(predictions, feed_dict) all_labels = np.concatenate([all_labels, labels]) all_preds = np.concatenate([all_preds, preds]) # Print accuracy if all_labels is not None: correct_preds = float(sum(all_preds == all_labels)) print("Total number of test examples: {}".format(len(all_labels))) print("Accuracy: {:g}".format(correct_preds / float(len(all_labels))))
def train(): word_dict = load_vocab(FLAGS.vocab_data) glove = load_glove("../glove.6B.{}d.txt".format(FLAGS.embedding_size), FLAGS.embedding_size, word_dict) train = Dataset(filepath=FLAGS.train_data, num_class=FLAGS.num_class, sequence_length=FLAGS.sequence_length) valid = Dataset(filepath=FLAGS.valid_data, num_class=FLAGS.num_class, sequence_length=FLAGS.sequence_length) with tf.Graph().as_default(): session_conf = tf.compat.v1.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.compat.v1.Session(config=session_conf) with sess.as_default(): rcnn = TextRCNN(vocab_size=len(word_dict), embedding_size=FLAGS.embedding_size, sequence_length=FLAGS.sequence_length, num_class=FLAGS.num_class, cell_type=FLAGS.cell_type, hidden_size=FLAGS.hidden_size, pretrained_embeddings=glove, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define training procedure global_step = tf.compat.v1.Variable(0, name="global_step", trainable=False) train_op = tf.compat.v1.train.AdamOptimizer( FLAGS.learning_rate).minimize(rcnn.loss, global_step=global_step) acc, acc_op = tf.compat.v1.metrics.accuracy( labels=rcnn.labels, predictions=rcnn.predictions, name="metrics/acc") metrics_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.LOCAL_VARIABLES, scope="metrics") metrics_init_op = tf.compat.v1.variables_initializer( var_list=metrics_vars) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.compat.v1.summary.scalar("loss", rcnn.loss) acc_summary = tf.compat.v1.summary.scalar("accuracy", rcnn.accuracy) # Train summaries train_summary_op = tf.compat.v1.summary.merge( [loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.compat.v1.summary.FileWriter( train_summary_dir, sess.graph) # Valid summaries valid_step = 0 valid_summary_op = tf.compat.v1.summary.merge( [loss_summary, acc_summary]) valid_summary_dir = os.path.join(out_dir, "summaries", "valid") valid_summary_writer = tf.compat.v1.summary.FileWriter( valid_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables(), max_to_keep=FLAGS.num_checkpoints) # initialize all variables best_valid_acc = 0.0 sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.local_variables_initializer()) # training and validating loop for epoch in range(FLAGS.num_epoch): print('-' * 100) print('\n{}> epoch: {}\n'.format( datetime.datetime.now().isoformat(), epoch)) sess.run(metrics_init_op) # Training process for batch in train.bacth_iter(FLAGS.batch_size, desc="Training", shuffle=True): labels, docs = zip(*batch) padded_docs, _, _ = vectorize(docs, FLAGS.sequence_length) feed_dict = { rcnn.inputs: padded_docs, rcnn.labels: labels, rcnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy, _ = sess.run([ train_op, global_step, train_summary_op, rcnn.loss, rcnn.accuracy, acc_op ], feed_dict) train_summary_writer.add_summary(summaries, step) print("\ntraining accuracy = {:.2f}\n".format( sess.run(acc) * 100)) sess.run(metrics_init_op) # Validating process for batch in valid.bacth_iter(FLAGS.batch_size, desc="Validating", shuffle=False): valid_step += 1 labels, docs = zip(*batch) padded_docs, _, _ = vectorize(docs, FLAGS.sequence_length) feed_dict = { rcnn.inputs: padded_docs, rcnn.labels: labels, rcnn.dropout_keep_prob: 1.0 } summaries, loss, accuracy, _ = sess.run( [valid_summary_op, rcnn.loss, rcnn.accuracy, acc_op], feed_dict) valid_summary_writer.add_summary(summaries, global_step=valid_step) valid_acc = sess.run(acc) * 100 print("\nvalidating accuracy = {:.2f}\n".format(valid_acc)) # model checkpoint if valid_acc > best_valid_acc: best_valid_acc = valid_acc print("current best validating accuracy = {:.2f}\n".format( best_valid_acc)) path = saver.save(sess, checkpoint_prefix) print("saved model checkpoint to {}\n".format(path)) print("{} optimization finished!\n".format( datetime.datetime.now())) print("best validating accuracy = {:.2f}\n".format(best_valid_acc))
def main(args): logging.info('-' * 50) logging.info('Load data files..') if not (args.test_only): logging.info('*' * 10 + ' All') all_examples = utils.load_data(args.all_file, 100, relabeling=args.relabeling) dev_ratio = args.dev_ratio sample_index = np.arange(len(all_examples[0])) random.seed(1000) dev_index = random.sample(sample_index, int(dev_ratio * len(sample_index))) train_index = np.setdiff1d(sample_index, dev_index) dev_examples = tuple_part(all_examples, dev_index) train_examples = tuple_part(all_examples, train_index) #feature preprocessing train_fea_flat_np = FeaExtract(train_examples[-1]) dev_fea_flat_np = FeaExtract(dev_examples[-1]) train_fea_flat_np2, dev_fea_flat_np2 = Prepocessing_func( train_fea_flat_np, dev_fea_flat_np, varian_ratio_tol=args.pca_ratio) train_fea_merge = FeaMerge(train_fea_flat_np2, train_examples[-1]) dev_fea_merge = FeaMerge(dev_fea_flat_np2, dev_examples[-1]) train_examples = train_examples[:-1] + (train_fea_merge, ) dev_examples = dev_examples[:-1] + (dev_fea_merge, ) args.num_train = len(train_examples[0]) else: # logging.info('*' * 10 + ' Train') # train_examples = utils.load_data(args.train_file, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling) dev_fea_flat_np = FeaExtract(dev_examples[-1]) dev_fea_flat_np2 = PrepocessingApply_func(dev_fea_flat_np) dev_fea_merge = FeaMerge(dev_fea_flat_np2, dev_examples[-1]) dev_examples = dev_examples[:-1] + (dev_fea_merge, ) args.num_dev = len(dev_examples[0]) args.mea_num = dev_examples[4][0].shape[-1] logging.info('-' * 50) logging.info('Build dictionary..') word_dict = pickle.load(open("../../obj/dict.pkl", "rb")) logging.info('-' * 50) embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file) (args.vocab_size, args.embedding_size) = embeddings.shape logging.info('Compile functions..') train_fn, test_fn, params, all_params = build_fn(args, embeddings) logging.info('Done.') logging.info('-' * 50) logging.info(args) logging.info('-' * 50) logging.info('Intial test..') dev_x1, dev_x2, dev_x3, dev_y, dev_x4 = utils.vectorize( dev_examples, word_dict, sort_by_len=not args.test_only, concat=args.concat) word_dict_r = {} word_dict_r[0] = "unk" assert len(dev_x1) == args.num_dev all_dev = gen_examples(dev_x1, dev_x2, dev_x3, dev_y, dev_x4, args.batch_size, args.concat) dev_acc, rediction = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc.mean()) print(dev_acc.mean()) best_dev_acc = dev_acc best_train_acc = 0 if args.test_only: return dev_acc, best_train_acc utils.save_params(args.model_file, all_params, epoch=0, n_updates=0) # Training logging.info('-' * 50) logging.info('Start training..') train_x1, train_x2, train_x3, train_y, train_x4 = utils.vectorize( train_examples, word_dict, concat=args.concat) assert len(train_x1) == args.num_train start_time = time.time() n_updates = 0 all_train = gen_examples(train_x1, train_x2, train_x3, train_y, train_x4, args.batch_size, args.concat) for epoch in range(args.num_epoches): np.random.shuffle(all_train) for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3, mb_mask3, mb_y, mb_x4, mb_mask4) in enumerate(all_train): train_loss = train_fn(mb_x1, mb_mask1, mb_x3, mb_mask3, mb_y, mb_x4) # if idx % 100 == 0: # if epoch % 100 == 0: # logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1])) # logging.info('Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' % (epoch, idx, len(all_train), train_loss, time.time() - start_time)) n_updates += 1 if n_updates % args.eval_iter == 0: print([x.get_value() for x in params]) print([x.get_value() for x in all_params]) samples = sorted( np.random.choice(args.num_train, min(args.num_train, args.num_dev), replace=False)) sample_train = gen_examples( [train_x1[k] for k in samples], [train_x2[k] for k in samples], [train_x3[k * 4 + o] for k in samples for o in range(4)], [train_y[k] for k in samples], [train_x4[k] for k in samples], args.batch_size, args.concat) acc, pred = eval_acc(test_fn, sample_train) logging.info('Train accuracy: %.2f %%' % acc) train_acc, pred = eval_acc(test_fn, all_train) logging.info('train accuracy: %.2f %%' % train_acc) dev_acc, pred = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) if dev_acc > best_dev_acc: best_dev_acc = dev_acc logging.info( 'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%' % (epoch, n_updates, best_dev_acc)) best_train_acc = acc logging.info( 'Best train accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%' % (epoch, n_updates, best_train_acc)) utils.save_params( args.model_file, all_params, epoch=epoch, n_updates=n_updates, ) return best_dev_acc, best_train_acc
def main(args): logging.info('-' * 50) logging.info('Load data files..') question_belong = [] if args.debug: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, 100, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, 100, relabeling=args.relabeling, question_belong=question_belong) else: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling, question_belong=question_belong) args.num_train = len(train_examples[0]) args.num_dev = len(dev_examples[0]) logging.info('-' * 50) logging.info('Build dictionary..') #word_dict = utils.build_dict(train_examples[0] + train_examples[1] + train_examples[2], args.max_vocab_size) word_dict = pickle.load(open("../obj/dict.pkl", "rb")) logging.info('-' * 50) embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file) # EMBEDDING (args.vocab_size, args.embedding_size) = embeddings.shape logging.info('Compile functions..') train_fn, test_fn, params, all_params = build_fn(args, embeddings) logging.info('Done.') logging.info('-' * 50) logging.info(args) logging.info('-' * 50) logging.info('Intial test..') dev_x1, dev_x2, dev_x3, dev_y = utils.vectorize( dev_examples, word_dict, sort_by_len=not args.test_only, concat=args.concat) word_dict_r = {} word_dict_r[0] = "unk" assert len(dev_x1) == args.num_dev all_dev = gen_examples(dev_x1, dev_x2, dev_x3, dev_y, args.batch_size, args.concat) dev_acc, pred = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) best_acc = dev_acc if args.test_only: return utils.save_params(args.model_file, all_params, epoch=0, n_updates=0) # Training logging.info('-' * 50) logging.info('Start training..') train_x1, train_x2, train_x3, train_y = utils.vectorize(train_examples, word_dict, concat=args.concat) assert len(train_x1) == args.num_train start_time = time.time() n_updates = 0 all_train = gen_examples(train_x1, train_x2, train_x3, train_y, args.batch_size, args.concat) for epoch in range(args.num_epoches): np.random.shuffle(all_train) for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3, mb_mask3, mb_y) in enumerate(all_train): train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3, mb_mask3, mb_y) if idx % 100 == 0: logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1])) logging.info( 'Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' % (epoch, idx, len(all_train), train_loss, time.time() - start_time)) n_updates += 1 if n_updates % args.eval_iter == 0: samples = sorted( np.random.choice(args.num_train, min(args.num_train, args.num_dev), replace=False)) sample_train = gen_examples( [train_x1[k] for k in samples], [train_x2[k] for k in samples], [train_x3[k * 4 + o] for k in samples for o in range(4)], [train_y[k] for k in samples], args.batch_size, args.concat) acc, pred = eval_acc(test_fn, sample_train) logging.info('Train accuracy: %.2f %%' % acc) dev_acc, pred = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) if dev_acc > best_acc: best_acc = dev_acc logging.info( 'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%' % (epoch, n_updates, dev_acc)) utils.save_params(args.model_file, all_params, epoch=epoch, n_updates=n_updates)
def block_trunc_pad_zeroes(item, xlen=max_words_len): return np_pad_to_size( [vectorize(word, embeddings_model) for word in item[:xlen]], minsizes=(None, xlen, None))
def main(): start = timer() if (os.path.isfile("data/tweets" + str(max_example) + ".npy") and os.path.isfile("data/emojis" + str(max_example) + ".npy")): tweets = np.load("data/tweets" + str(max_example) + ".npy").tolist() emojis = np.load("data/emojis" + str(max_example) + ".npy").tolist() else: tweets, emojis = utils.load_data(path='data/final_train', max_example=max_example) np.save("data/tweets" + str(max_example) + ".npy", np.array(tweets)) np.save("data/emojis" + str(max_example) + ".npy", np.array(emojis)) if (os.path.isfile("data/dev_tweets" + str(max_dev_example) + ".npy") and os.path.isfile("data/dev_emojis" + str(max_dev_example) + ".npy")): dev_tweets = np.load("data/dev_tweets" + str(max_dev_example) + ".npy").tolist() dev_emojis = np.load("data/dev_emojis" + str(max_dev_example) + ".npy").tolist() else: dev_tweets, dev_emojis = utils.load_data(max_example=max_dev_example) np.save("data/dev_tweets" + str(max_dev_example) + ".npy", np.array(dev_tweets)) np.save("data/dev_emojis" + str(max_dev_example) + ".npy", np.array(dev_emojis)) start1 = timer() print(start1 - start) word_dict = utils.build_dict(tweets) # embeddings = utils.generate_embeddings(word_dict, dim=300, pretrained_path='data/glove.6B.300d.txt') embeddings = utils.generate_embeddings(word_dict, dim=300, pretrained_path=None) end0 = timer() print(end0 - start1) x, y = utils.vectorize(tweets, emojis, word_dict) dev_x, dev_y = utils.vectorize(dev_tweets, dev_emojis, word_dict) end1 = timer() print(end1 - end0) batch_size, input_size, hidden_size, output_size, layers = 32, 300, 200, 20, 1 all_train = utils.generate_batches(x, y, batch_size=batch_size) all_dev = utils.generate_batches(dev_x, dev_y, batch_size=batch_size) end2 = timer() print(end2 - end1) # set the parameters # batch_size, input_size, hidden_size, output_size, layers = 64, 50, 200, 20, 1 vocabulary_size = len(embeddings) if run_GRU: print("running GRU...") # initialize the model model = GRU_Classifier(vocabulary_size, input_size, hidden_size, output_size, layers, run_BD_GRU) model.word_embeddings.weight.data = torch.FloatTensor( embeddings.tolist()) if torch.cuda.is_available(): model.cuda() (model.word_embeddings.weight.data).cuda() loss_function = nn.CrossEntropyLoss() if torch.cuda.is_available(): loss_function.cuda() optimizer = optim.Adam(model.parameters(), lr=global_learning_rate) epoch_num = 500 it = 0 best_dev_acc = 0 best_f1 = 0 # model training for epoch in range(epoch_num): np.random.shuffle(all_train) for idx, (mb_x, mb_y, mb_lengths) in enumerate(all_train): # sort the input in descending order according to sentence length # This is required by nn.utils.rnn.pack_padded_sequence sorted_index = len_value_argsort(mb_lengths) mb_x = [mb_x[i] for i in sorted_index] mb_y = [mb_y[i] for i in sorted_index] mb_lengths = [mb_lengths[i] for i in sorted_index] print('#Examples = %d, max_seq_len = %d' % (len(mb_x), len(mb_x[0]))) mb_x = Variable(torch.from_numpy(np.array(mb_x, dtype=np.int64)), requires_grad=False) if torch.cuda.is_available(): mb_x = mb_x.cuda() y_pred = model(mb_x.t(), mb_lengths) mb_y = Variable(torch.from_numpy(np.array(mb_y, dtype=np.int64)), requires_grad=False) if torch.cuda.is_available(): mb_y = mb_y.cuda() loss = loss_function(y_pred, mb_y) # print('epoch ', epoch, 'batch ', idx, 'loss ', loss.data[0]) optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() it += 1 if it % 100 == 0: # every 100 updates, check dev accuracy correct = 0 n_examples = 0 ground_truth = [] predicted = [] for idx, (d_x, d_y, d_lengths) in enumerate(all_dev): ground_truth += d_y n_examples += len(d_x) sorted_index = len_value_argsort(d_lengths) d_x = [d_x[i] for i in sorted_index] d_y = [d_y[i] for i in sorted_index] d_lengths = [d_lengths[i] for i in sorted_index] d_x = Variable(torch.from_numpy( np.array(d_x, dtype=np.int64)), requires_grad=False) if torch.cuda.is_available(): d_x = d_x.cuda() # use pytorch way to calculate the correct count d_y = Variable(torch.from_numpy( np.array(d_y, dtype=np.int64)), requires_grad=False) if torch.cuda.is_available(): d_y = d_y.cuda() y_pred = model(d_x.t(), d_lengths) predicted += list( torch.max(y_pred, 1)[1].view(d_y.size()).data) correct += (torch.max(y_pred, 1)[1].view( d_y.size()).data == d_y.data).sum() dev_acc = correct / n_examples f1 = f1_score(ground_truth, predicted, average='macro') print("Dev Accuracy: %f, F1 Score: %f" % (dev_acc, f1)) if f1 > best_f1: best_f1 = f1 print("Best F1 Score: %f" % best_f1) gru_output = open('./out/gru_best', 'w') gru_output.write(str(ground_truth) + '\n') gru_output.write(str(predicted) + '\n') gru_output.write(str(best_f1) + ' ' + str(dev_acc)) gru_output.close() if dev_acc > best_dev_acc: best_dev_acc = dev_acc print("Best Dev Accuracy: %f" % best_dev_acc) if run_LSTM: print("Running LSTM...") model = LSTM_Classifier(vocabulary_size, input_size, hidden_size, output_size, layers, run_BD_LSTM) model.word_embeddings.weight.data = torch.FloatTensor( embeddings.tolist()) if torch.cuda.is_available(): model.cuda() (model.word_embeddings.weight.data).cuda() loss_function = nn.CrossEntropyLoss() if torch.cuda.is_available(): loss_function.cuda() optimizer = optim.Adam(model.parameters(), lr=global_learning_rate) it = 0 best_dev_acc = 0 best_f1 = 0 epoch_num = 500 # train LSTM for epoch in range(epoch_num): np.random.shuffle(all_train) for idx, (mb_x, mb_y, mb_lengths) in enumerate(all_train): sorted_index = len_value_argsort(mb_lengths) mb_x = [mb_x[i] for i in sorted_index] mb_y = [mb_y[i] for i in sorted_index] mb_lengths = [mb_lengths[i] for i in sorted_index] print('#Examples = %d, max_seq_len = %d' % (len(mb_x), len(mb_x[0]))) mb_x = Variable(torch.from_numpy(np.array(mb_x, dtype=np.int64)), requires_grad=False) if torch.cuda.is_available(): mb_x = mb_x.cuda() y_pred = model(mb_x.t(), mb_lengths) mb_y = Variable(torch.from_numpy(np.array(mb_y, dtype=np.int64)), requires_grad=False) if torch.cuda.is_available(): mb_y = mb_y.cuda() loss = loss_function(y_pred, mb_y) # print('epoch ', epoch, 'batch ', idx, 'loss ', loss.data[0]) optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() it += 1 if it % 100 == 0: # every 100 updates, check dev accuracy correct = 0 n_examples = 0 ground_truth = [] predicted = [] for idx, (d_x, d_y, d_lengths) in enumerate(all_dev): ground_truth += d_y n_examples += len(d_x) sorted_index = len_value_argsort(d_lengths) d_x = [d_x[i] for i in sorted_index] d_y = [d_y[i] for i in sorted_index] d_lengths = [d_lengths[i] for i in sorted_index] d_x = Variable(torch.from_numpy( np.array(d_x, dtype=np.int64)), requires_grad=False) if torch.cuda.is_available(): d_x = d_x.cuda() d_y = Variable(torch.from_numpy( np.array(d_y, dtype=np.int64)), requires_grad=False) if torch.cuda.is_available(): d_y = d_y.cuda() y_pred = model(d_x.t(), d_lengths) predicted += list( torch.max(y_pred, 1)[1].view(d_y.size()).data) correct += (torch.max(y_pred, 1)[1].view( d_y.size()).data == d_y.data).sum() dev_acc = correct / n_examples f1 = f1_score(ground_truth, predicted, average='macro') print("Dev Accuracy: %f, F1 Score: %f" % (dev_acc, f1)) if f1 > best_f1: best_f1 = f1 print("Best F1 Score: %f" % best_f1) lstm_output = open('./out/lstm_best', 'w') lstm_output.write(str(ground_truth) + '\n') lstm_output.write(str(predicted) + '\n') lstm_output.write(str(best_f1) + ' ' + str(dev_acc)) lstm_output.close() if dev_acc > best_dev_acc: best_dev_acc = dev_acc print("Best Dev Accuracy: %f" % best_dev_acc)
def compute_gradient_moments(self, grads_and_vars): first_moment = U.vectorize(grads_and_vars, set_none_to_zero=True) second_moment = tf.square(first_moment) self.maintain_ema_ops.append(self.ema.apply([first_moment, second_moment])) return self.ema.average(first_moment), self.ema.average(second_moment)
def main(args): logging.info('-' * 50) logging.info('Load data files..') if args.debug: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, 100, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, 100, relabeling=args.relabeling) else: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling) args.num_train = len(train_examples[0]) args.num_dev = len(dev_examples[0]) logging.info('-' * 50) logging.info('Build dictionary..') word_dict = utils.build_dict(train_examples[0] + train_examples[1]) entity_markers = list( set([w for w in word_dict.keys() if w.startswith('@entity')] + train_examples[2])) entity_markers = ['<unk_entity>'] + entity_markers entity_dict = {w: index for (index, w) in enumerate(entity_markers)} logging.info('Entity markers: %d' % len(entity_dict)) args.num_labels = len(entity_dict) logging.info('-' * 50) # Load embedding file embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file) (args.vocab_size, args.embedding_size) = embeddings.shape logging.info('Compile functions..') train_fn, test_fn, params = build_fn(args, embeddings) logging.info('Done.') if args.prepare_model: return train_fn, test_fn, params logging.info('-' * 50) logging.info(args) logging.info('-' * 50) logging.info('Intial test..') dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(dev_examples, word_dict, entity_dict) assert len(dev_x1) == args.num_dev all_dev = gen_examples(dev_x1, dev_x2, dev_l, dev_y, args.batch_size) dev_acc = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) best_acc = dev_acc if args.test_only: return utils.save_params(args.model_file, params, epoch=0, n_updates=0) # Training logging.info('-' * 50) logging.info('Start training..') train_x1, train_x2, train_l, train_y = utils.vectorize( train_examples, word_dict, entity_dict) assert len(train_x1) == args.num_train start_time = time.time() n_updates = 0 all_train = gen_examples(train_x1, train_x2, train_l, train_y, args.batch_size) for epoch in range(args.num_epoches): np.random.shuffle(all_train) for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y) in enumerate(all_train): logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1])) train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y) logging.info( 'Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' % (epoch, idx, len(all_train), train_loss, time.time() - start_time)) n_updates += 1 if n_updates % args.eval_iter == 0: samples = sorted( np.random.choice(args.num_train, min(args.num_train, args.num_dev), replace=False)) sample_train = gen_examples([train_x1[k] for k in samples], [train_x2[k] for k in samples], train_l[samples], [train_y[k] for k in samples], args.batch_size) logging.info('Train accuracy: %.2f %%' % eval_acc(test_fn, sample_train)) dev_acc = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) if dev_acc > best_acc: best_acc = dev_acc logging.info( 'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%' % (epoch, n_updates, dev_acc)) utils.save_params(args.model_file, params, epoch=epoch, n_updates=n_updates)
def main(args): logging.info('-' * 50) logging.info('Load data files..') if args.debug: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, 100, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, 100, relabeling=args.relabeling) else: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling) args.num_train = len(train_examples[0]) args.num_dev = len(dev_examples[0]) logging.info('-' * 50) logging.info('Build dictionary..') word_dict = utils.build_dict(train_examples[0] + train_examples[1]) entity_markers = list(set([w for w in word_dict.keys() if w.startswith('@entity')] + train_examples[2])) entity_markers = ['<unk_entity>'] + entity_markers entity_dict = {w: index for (index, w) in enumerate(entity_markers)} logging.info('Entity markers: %d' % len(entity_dict)) args.num_labels = len(entity_dict) logging.info('-' * 50) # Load embedding file embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file) (args.vocab_size, args.embedding_size) = embeddings.shape logging.info('Compile functions..') train_fn, test_fn, params = build_fn(args, embeddings) logging.info('Done.') logging.info('-' * 50) logging.info(args) logging.info('-' * 50) logging.info('Intial test..') dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(dev_examples, word_dict, entity_dict) assert len(dev_x1) == args.num_dev all_dev = gen_examples(dev_x1, dev_x2, dev_l, dev_y, args.batch_size) dev_acc = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) best_acc = dev_acc if args.test_only: return utils.save_params(args.model_file, params, epoch=0, n_updates=0) # Training logging.info('-' * 50) logging.info('Start training..') train_x1, train_x2, train_l, train_y = utils.vectorize(train_examples, word_dict, entity_dict) assert len(train_x1) == args.num_train start_time = time.time() n_updates = 0 all_train = gen_examples(train_x1, train_x2, train_l, train_y, args.batch_size) for epoch in range(args.num_epoches): np.random.shuffle(all_train) for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y) in enumerate(all_train): logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1])) train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y) logging.info('Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' % (epoch, idx, len(all_train), train_loss, time.time() - start_time)) n_updates += 1 if n_updates % args.eval_iter == 0: samples = sorted(np.random.choice(args.num_train, min(args.num_train, args.num_dev), replace=False)) sample_train = gen_examples([train_x1[k] for k in samples], [train_x2[k] for k in samples], train_l[samples], [train_y[k] for k in samples], args.batch_size) logging.info('Train accuracy: %.2f %%' % eval_acc(test_fn, sample_train)) logging.info('Dev accuracy: %.2f %%' % eval_acc(test_fn, all_dev)) if dev_acc > best_acc: best_acc = dev_acc logging.info('Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%' % (epoch, n_updates, dev_acc)) utils.save_params(args.model_file, params, epoch=epoch, n_updates=n_updates)