Example #1
0
  def get_dynamic_rebar_gradient(self):
    """Get the dynamic rebar gradient (t, eta optimized)."""
    tiled_pre_temperature = tf.tile([self.pre_temperature_variable],
                                [self.batch_size])
    temperature = tf.exp(tiled_pre_temperature)

    hardELBO, nvil_gradient, logQHard = self._create_hard_elbo()
    if self.hparams.quadratic:
      gumbel_cv, extra  = self._create_gumbel_control_variate_quadratic(logQHard, temperature=temperature)
    else:
      gumbel_cv, extra  = self._create_gumbel_control_variate(logQHard, temperature=temperature)

    f_grads = self.optimizer_class.compute_gradients(tf.reduce_mean(-nvil_gradient))

    eta = {}
    h_grads, eta_statistics = self.multiply_by_eta_per_layer(
        self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)),
        eta)

    model_grads = U.add_grads_and_vars(f_grads, h_grads)
    total_grads = model_grads

    # Construct the variance objective
    g = U.vectorize(model_grads, set_none_to_zero=True)
    self.maintain_ema_ops.append(self.ema.apply([g]))
    gbar = 0  #tf.stop_gradient(self.ema.average(g))
    variance_objective = tf.reduce_mean(tf.square(g - gbar))

    reinf_g_t = 0
    if self.hparams.quadratic:
      for layer in xrange(self.hparams.n_layer):
        gumbel_learning_signal, _ = extra[layer]
        df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0]
        reinf_g_t_i, _ = self.multiply_by_eta_per_layer(
            self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * logQHard[layer])),
            eta)
        reinf_g_t += U.vectorize(reinf_g_t_i, set_none_to_zero=True)

      reparam = tf.add_n([reparam_i for _, reparam_i in extra])
    else:
      gumbel_learning_signal, reparam = extra
      df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0]
      reinf_g_t, _ = self.multiply_by_eta_per_layer(
          self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * tf.add_n(logQHard))),
          eta)
      reinf_g_t = U.vectorize(reinf_g_t, set_none_to_zero=True)

    reparam_g, _ = self.multiply_by_eta_per_layer(
        self.optimizer_class.compute_gradients(tf.reduce_mean(reparam)),
        eta)
    reparam_g = U.vectorize(reparam_g, set_none_to_zero=True)
    reparam_g_t = tf.gradients(tf.reduce_mean(2*tf.stop_gradient(g - gbar)*reparam_g), self.pre_temperature_variable)[0]

    variance_objective_grad = tf.reduce_mean(2*(g - gbar)*reinf_g_t) + reparam_g_t

    debug = { 'ELBO': hardELBO,
             'etas': eta_statistics,
             'variance_objective': variance_objective,
             }
    return total_grads, debug, variance_objective, variance_objective_grad
Example #2
0
def make_data(data, wordvec_file, ans_file=None, base_dir='./rawdata'):
    if ans_file:
        ans_data = pd.read_csv(base_dir + ans_file, sep='\t').set_index('q_id')
        answer_map = {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4}
        ans_data['answer'] = ans_data['answer'].apply(lambda x: answer_map[x])

    wordvec = pd.read_csv(base_dir + wordvec_file,
                          sep=' ',
                          header=None,
                          index_col=0)

    vectors = {}
    for qid, data_dict in data.items():
        vectors[qid] = {}

        ques_vec = vectorize(data_dict['question'], wordvec)
        capt_vec = vectorize(data_dict['caption'], wordvec)
        context = np.concatenate((ques_vec, capt_vec))

        X = []
        for choice in data_dict['choices']:
            choi_vec = vectorize(choice, wordvec)
            x = np.concatenate((context, choi_vec))
            X.append(x)

        vectors[qid]['X'] = np.array(X)

        if ans_file:
            ans = ans_data.loc[qid]['answer']
            y = [([0, 1] if i == ans else [1, 0]) for i in range(5)]
            vectors[qid]['y'] = np.array(y)

    return vectors
Example #3
0
  def get_dynamic_rebar_gradient(self):
    """Get the dynamic rebar gradient (t, eta optimized)."""
    tiled_pre_temperature = tf.tile([self.pre_temperature_variable],
                                [self.batch_size])
    temperature = tf.exp(tiled_pre_temperature)

    hardELBO, nvil_gradient, logQHard = self._create_hard_elbo()
    if self.hparams.quadratic:
      gumbel_cv, extra  = self._create_gumbel_control_variate_quadratic(logQHard, temperature=temperature)
    else:
      gumbel_cv, extra  = self._create_gumbel_control_variate(logQHard, temperature=temperature)

    f_grads = self.optimizer_class.compute_gradients(tf.reduce_mean(-nvil_gradient))

    eta = {}
    h_grads, eta_statistics = self.multiply_by_eta_per_layer(
        self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)),
        eta)

    model_grads = U.add_grads_and_vars(f_grads, h_grads)
    total_grads = model_grads

    # Construct the variance objective
    g = U.vectorize(model_grads, set_none_to_zero=True)
    self.maintain_ema_ops.append(self.ema.apply([g]))
    gbar = 0  #tf.stop_gradient(self.ema.average(g))
    variance_objective = tf.reduce_mean(tf.square(g - gbar))

    reinf_g_t = 0
    if self.hparams.quadratic:
      for layer in xrange(self.hparams.n_layer):
        gumbel_learning_signal, _ = extra[layer]
        df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0]
        reinf_g_t_i, _ = self.multiply_by_eta_per_layer(
            self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * logQHard[layer])),
            eta)
        reinf_g_t += U.vectorize(reinf_g_t_i, set_none_to_zero=True)

      reparam = tf.add_n([reparam_i for _, reparam_i in extra])
    else:
      gumbel_learning_signal, reparam = extra
      df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0]
      reinf_g_t, _ = self.multiply_by_eta_per_layer(
          self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * tf.add_n(logQHard))),
          eta)
      reinf_g_t = U.vectorize(reinf_g_t, set_none_to_zero=True)

    reparam_g, _ = self.multiply_by_eta_per_layer(
        self.optimizer_class.compute_gradients(tf.reduce_mean(reparam)),
        eta)
    reparam_g = U.vectorize(reparam_g, set_none_to_zero=True)
    reparam_g_t = tf.gradients(tf.reduce_mean(2*tf.stop_gradient(g - gbar)*reparam_g), self.pre_temperature_variable)[0]

    variance_objective_grad = tf.reduce_mean(2*(g - gbar)*reinf_g_t) + reparam_g_t

    debug = { 'ELBO': hardELBO,
             'etas': eta_statistics,
             'variance_objective': variance_objective,
             }
    return total_grads, debug, variance_objective, variance_objective_grad
Example #4
0
File: main.py Project: zhyq/acnn
def init():
    path = config.data_path
    config.embedding_file = os.path.join(path, config.embedding_file)
    config.embedding_vocab = os.path.join(path, config.embedding_vocab)
    config.train_file = os.path.join(path, config.train_file)
    config.test_file = os.path.join(path, config.test_file)

    # Config log
    if config.log_file is None:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(message)s',
                            datefmt='%m-%d %H:%M')
    else:
        if not os.path.exists(config.save_path):
            os.makedirs(config.save_path)
        logging.basicConfig(filename=config.log_file,
                            filemode='a',
                            level=logging.DEBUG,
                            format='%(asctime)s %(message)s',
                            datefmt='%m-%d %H:%M')
    # Load data
    # data = (sentences, relations, e1_pos, e2_pos)
    train_data = utils.load_data(config.train_file)
    test_data = utils.load_data(config.test_file)

    logging.info('trian data: %d' % len(train_data[0]))
    logging.info('test data: %d' % len(test_data[0]))

    # Build vocab
    word_dict = utils.build_dict(train_data[0] + test_data[0])
    logging.info('total words: %d' % len(word_dict))

    embeddings = utils.load_embedding(config, word_dict)

    # Log parameters
    flags = config.__dict__['__flags']
    flag_str = "\n"
    for k in flags:
        flag_str += "\t%s:\t%s\n" % (k, flags[k])
    logging.info(flag_str)

    # vectorize data
    # vec = (sents_vec, relations, e1_vec, e2_vec, dist1, dist2)
    max_len_train = len(max(train_data[0], key=lambda x: len(x)))
    max_len_test = len(max(test_data[0], key=lambda x: len(x)))
    max_len = max(max_len_train, max_len_test)
    config.max_len = max_len

    train_vec = utils.vectorize(train_data, word_dict, max_len)
    test_vec = utils.vectorize(test_data, word_dict, max_len)

    return embeddings, train_vec, test_vec
Example #5
0
    def stopRecording(self, event):
        '''
		Arrete l'enregistrement des frames, et convertit
		le buffer des frames vers un vecteur de la meme langeur
		selon le type ('gestuelle' ou 'fixé')
		'''
        if self.isRecording:
            value = self.currentSign
            # arrete le thread d'enregistrement
            self.isRecording = False

            if self.currentSign.type == 'Fixed' and len(self.vectorBuffer):
                self.currentSign.samples.append(
                    utils.vectorize(self.vectorBuffer, fixed=True))

            # On vérifie qu'on a au moins 10 frames pour créer la gestuelle
            elif len(self.vectorBuffer) >= 10:
                self.currentSign.samples.append((self.vectorBuffer))

            else:
                tkMessageBox.showwarning(
                    '', 'Peu de frames enregistré. réassayer')
                return

            if len(self.currentSign.samples) >= 2:
                if not utils.validateFeatures(self.currentSign.samples):
                    tkMessageBox.showwarning('', \
                     'Valeur null enregistré depuis le capteur. Supprimer les captures du signe actuel et réassayer')

            pickle.dump(self.dataset, open('dataset.p', 'wb'))
            self.updateSamplesList()
Example #6
0
def eval_acc(data):
    dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(data, word_dict,
                                                   entity_dict)
    all_dev = qa_model.gen_examples(dev_x1, dev_x2, dev_l, dev_y,
                                    args.batch_size)
    dev_acc = qa_model.eval_acc(test_fn, all_dev)
    return dev_acc
Example #7
0
    def compute_gradient_moments(self, grads_and_vars):
        first_moment = U.vectorize(grads_and_vars, set_none_to_zero=True)
        second_moment = tf.square(first_moment)
        self.maintain_ema_ops.append(
            self.ema.apply([first_moment, second_moment]))

        return self.ema.average(first_moment), self.ema.average(second_moment)
Example #8
0
    def preprocess(self, train, test):
        if self.model_type == "ML":
            if self.vectorizer == 'tf-igm':
                X, Y, test_X, test_Y, num_words = tf_igm_vectorizer(
                    train, test, stopwords_file=self.stopwords_file)
            else:
                X, Y, test_X, test_Y, num_words = vectorize(
                    train,
                    test,
                    vectorizer=self.vectorizer,
                    stopwords_file=self.stopwords_file)
            Y = np.argmax(Y, axis=1)
            test_Y = np.argmax(test_Y, axis=1)
        else:
            if self.vectorizer == 'tf-igm':
                X, Y, test_X, test_Y, num_words = tf_igm_vectorizer(
                    train, test, stopwords_file=self.stopwords_file)
            else:
                X, Y, test_X, test_Y, num_words = tokenize(
                    train,
                    test,
                    stopwords_file=self.stopwords_file,
                    maxlen=self.maxlen)

        return X, Y, test_X, test_Y, num_words
Example #9
0
    def get_rebar_gradient(self):
        """Get the rebar gradient."""
        hardELBO, nvil_gradient, logQHard = self._create_hard_elbo()
        if self.hparams.quadratic:
            gumbel_cv, _ = self._create_gumbel_control_variate_quadratic(
                logQHard)
        else:
            gumbel_cv, _ = self._create_gumbel_control_variate(logQHard)

        f_grads = self.optimizer_class.compute_gradients(
            tf.reduce_mean(-nvil_gradient))

        eta = {}
        h_grads, eta_statistics = self.multiply_by_eta_per_layer(
            self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)),
            eta)

        model_grads = U.add_grads_and_vars(f_grads, h_grads)
        total_grads = model_grads

        # Construct the variance objective
        variance_objective = tf.reduce_mean(
            tf.square(U.vectorize(model_grads, set_none_to_zero=True)))

        debug = {
            'ELBO': hardELBO,
            'etas': eta_statistics,
            'variance_objective': variance_objective,
        }
        return total_grads, debug, variance_objective
Example #10
0
def test(model,data,w2i,batch_size,task_id):
    print "start test---"
    model.eval()
    correct = 0
    count = 0
    for i in range(0,len(data)-batch_size,batch_size):
        batch_data = data[i:i+batch_size]
        story = [d[0] for d in batch_data]
        q = [d[1] for d in batch_data]
        a = [d[2][0] for d in batch_data]

        story_len = min(max_story_len,max([len(s) for s in story]))
        s_sent_len = max([len(sent) for s in story for sent in s])
        q_sent_len = max([len(sent) for sent in q])

        vec_data = vectorize(batch_data,w2i,story_len,s_sent_len,q_sent_len)
        story = [d[0] for d in vec_data]
        q = [d[1] for d in vec_data]
        a = [d[2][0] for d in vec_data]

        story = to_var(torch.LongTensor(story))
        q = to_var(torch.LongTensor(q))
        a = to_var(torch.LongTensor(a))
        pred = model(story,q)
        pred_idx = pred.max(1)[1]
        correct += torch.sum(pred_idx == a).item()
        count+=batch_size
    acc = (correct.__float__()/count)*100

    print('correct:',correct)
    print('Task {} Test Acc: {} % - '.format(task_id,acc),correct,'/',count)
    return acc
Example #11
0
  def get_rebar_gradient(self):
    """Get the rebar gradient."""
    hardELBO, nvil_gradient, logQHard = self._create_hard_elbo()
    if self.hparams.quadratic:
      gumbel_cv, _ = self._create_gumbel_control_variate_quadratic(logQHard)
    else:
      gumbel_cv, _ = self._create_gumbel_control_variate(logQHard)

    f_grads = self.optimizer_class.compute_gradients(tf.reduce_mean(-nvil_gradient))

    eta = {}
    h_grads, eta_statistics = self.multiply_by_eta_per_layer(
        self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)),
        eta)

    model_grads = U.add_grads_and_vars(f_grads, h_grads)
    total_grads = model_grads

    # Construct the variance objective
    variance_objective = tf.reduce_mean(tf.square(U.vectorize(model_grads, set_none_to_zero=True)))

    debug = { 'ELBO': hardELBO,
             'etas': eta_statistics,
             'variance_objective': variance_objective,
             }
    return total_grads, debug, variance_objective
	def stopRecording(self, event):
		'''
		Stop ongoing recording of frames, and based on sign type
		(fixed or gesture) convert buffer of frames to a vector with
		the corresponding length
		'''
		if self.isRecording:
			value = self.currentSign
			 # Kill recording thread
			self.isRecording = False

			# If sign recorded was fixed/static and has at least one frame
			if self.currentSign.type == 'Fixed' and len(self.vectorBuffer):
				self.currentSign.samples.append(utils.vectorize(self.vectorBuffer, fixed=True))

			# Make sure we have enough frames to create 10 keyframes for
			# gestures signs
			elif len(self.vectorBuffer) >= 10:
				self.currentSign.samples.append((self.vectorBuffer, fixed=False))

			# Not enough frames were recorded, return before making changes
			else:
				tkMessageBox.showwarning('', 'Too few frames recorded. Try again')
				return

			# Check that features across all samples are non-zero
			if len(self.currentSign.samples) >= 2:
				if not utils.validateFeatures(self.currentSign.samples):
					tkMessageBox.showwarning('', \
						'Zero-values recorded from sensor. Delete samples for current sign and try again')

			# Just to be safe, update dataset file despite not exiting program
			pickle.dump(self.dataset, open('dataset.p', 'wb'))
			self.updateSamplesList()
Example #13
0
def use_reinforce(env, n_episode, n_step, start_alpha, info_times=20):
    print('Start REINFORCE with', n_episode, n_step, start_alpha, info_times)

    theta1, theta2 = [0. for i in range(env.observation_dim)
                      ], [0. for i in range(env.observation_dim)]
    alpha = start_alpha
    average_score = 0

    for i_episode in range(n_episode):
        observation = vectorize(env.reset())
        score = 0
        actions = []
        states = []
        rewards = []

        for i_step in range(n_step):
            prev_observation = observation.copy()
            action = apply_policy(theta1, theta2, observation)
            if i_episode == n_episode - 1:
                env.render()
            observation, reward, done, info = env.step(action)
            observation = vectorize(observation)
            states.append(prev_observation)
            actions.append(action)
            rewards.append(reward)

            score += reward
            # env.render()
            if done:
                break

        alpha = update_alpha(start_alpha, i_episode)
        average_score += score

        if i_episode and i_episode % (n_episode / info_times) == 0:
            average_score /= (n_episode / info_times)
            print('Progression : {0} Average Score : {1}, Alpha : {2}'.format(
                i_episode / n_episode, average_score, alpha))

        # print('Episode ended in {0} steps, score is {1}'.format(survival_time, score))

        # print(theta1)
        # alpha = update_alpha(score)
        theta1 = update_theta(theta1, alpha, states, actions, rewards)
        theta2 = update_theta(theta2, alpha, states, actions, rewards)
Example #14
0
    def test_vectorize(self):
        with self.assertRaises(AssertionError):
            vectorize([])

        array = vectorize(np.array([]))
        self.assertEqual(array.shape, (0, 1))

        array = vectorize(np.zeros((2, 1)))
        self.assertEqual(array.shape, (2, 1))

        array = vectorize(np.zeros((1, 3)))
        self.assertEqual(array.shape, (3, 1))

        array = vectorize(np.zeros((3, 4, 2, 5)))
        self.assertEqual(array.shape, (120, 1))

        image = np.array([[[0.67826139, 0.29380381], [0.90714982, 0.52835647],
                           [0.4215251, 0.45017551]],
                          [[0.92814219, 0.96677647], [0.85304703, 0.52351845],
                           [0.19981397, 0.27417313]],
                          [[0.60659855, 0.00533165], [0.10820313, 0.49978937],
                           [0.34144279, 0.94630077]]])

        expected = np.array([[0.67826139], [0.29380381], [0.90714982],
                             [0.52835647], [0.4215251], [0.45017551],
                             [0.92814219], [0.96677647], [0.85304703],
                             [0.52351845], [0.19981397], [0.27417313],
                             [0.60659855], [0.00533165], [0.10820313],
                             [0.49978937], [0.34144279], [0.94630077]])

        self.assertTrue(np.allclose(vectorize(image), expected))
Example #15
0
    def simulate(self, state_node, copy_env, depth):
        if state_node.done or depth == self.max_depth or (state_node.look == 'unknown' and state_node.inv == 'unknown'):
            return 0
        
        best_action_node = self.greedy_action_node(state_node, self.exploration_constant, self.bonus_constant)

        rollout_next = False

        ob, reward, done, info = copy_env.step(best_action_node.action, valid_out=False)
        next_state_text = ob + info['look'] + info['inv']

        if '*** You have won ***' in next_state_text or '*** You have died ***' in next_state_text:
            score = int(next_state_text.split('you scored ')[1].split(' out of')[0])
            reward = score - state_node.score
            info['score'] = score

        self.write_buffer(state_node, best_action_node, ob, reward, done, info)

        if next_state_text in best_action_node.children_text:
            index = best_action_node.children_text.index(next_state_text)
            next_state_node = best_action_node.children[index]

            if next_state_node.N == 0:
                rollout_next = True
            next_state_node.N += 1

        else:
            if next_state_text in self.valid_action_dict.keys():
                info['valid'] = self.valid_action_dict[next_state_text]
            else:
                info['valid'] = copy_env.get_valid(ob)
                self.valid_action_dict[next_state_text] = info['valid']
            next_state_node = self.build_state(ob, info, reward, prev_action=best_action_node.action)
            best_action_node.children.append(next_state_node)
            best_action_node.children_text.append(next_state_node.state)
            rollout_next = True

        if rollout_next:
            R = reward + self.discount_factor * self.rollout(next_state_node, copy_env, depth+1)
        else:
            R = reward + self.discount_factor * self.simulate(next_state_node, copy_env, depth+1)

        state_node.N += 1
        best_action_node.N += 1

        if self.uct_type == 'MC-LAVE':
            if not best_action_node.action in self.action_embedding.keys():
                embed_vector = utils.vectorize(best_action_node.action)
                self.action_embedding[best_action_node.action] = embed_vector

            action_value = self.q_network.get_q_value(ob, info['look'], info['inv'], state_node.prev_action, info['score'], best_action_node.action)
            self.action_values[best_action_node.action].add(action_value)

        best_action_node.Rs.append(R)
        best_action_node.Q = np.sum(np.array(best_action_node.Rs) * utils.softmax(best_action_node.Rs, T=10))

        return R
Example #16
0
def test(args, word_dict, entity_dict, train_fn, test_fn, params):
    dev_examples = utils.load_data(args.dev_file,
                                   args.max_dev,
                                   relabeling=args.relabeling)
    dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(dev_examples, word_dict,
                                                   entity_dict)
    assert len(dev_x1) == args.num_dev
    all_dev = gen_examples(dev_x1, dev_x2, dev_l, dev_y, args.batch_size)
    dev_acc = eval_acc(test_fn, all_dev)
    return dev_acc
Example #17
0
def prepare_quries_answers(args):
  chat_data = utils.load_data(args.data_dir)
  chat_data = utils.filter_sentences(chat_data, args.whitelist)
  index2word, word2index = utils.build_vocab(chat_data, max_words=args.max_words)
  Limits.q_max_len, Limits.a_max_len, Limits.q_min_len, Limits.a_min_len = args.q_max_len, \
      args.a_max_len, args.q_min_len, args.a_min_len
  queries, answers = utils.split_data(chat_data, Limits)
  queries, answers = utils.vectorize(queries,  answers, word2index, sort_by_len=True)

  return queries, answers, index2word, word2index
Example #18
0
    def greedy_action_node(self, state_node, exploration_constant, bonus_constant):
        best_value = -np.inf
        best_children = []

        for i in range(len(state_node.children)):
            child = state_node.children[i]
            child_prob = state_node.children_probs[i]

            if exploration_constant == 0:
                ucb_value = child.Q
            elif self.uct_type == 'UCT':
                ucb_value = child.Q + exploration_constant * np.sqrt(np.log(state_node.N + 1) / (child.N + 1))
            elif self.uct_type == 'PUCT':
                ucb_value = child.Q + exploration_constant * child_prob * np.sqrt(state_node.N + 1) / (child.N + 1)
            elif self.uct_type == 'MC-LAVE':
                if child.action in self.action_embedding.keys():
                    action_e = self.action_embedding[child.action]
                else:
                    action_e = utils.vectorize(child.action)
                    self.action_embedding[child.action] = action_e

                actions = list(self.action_values.keys())
                if child.action in actions:
                    actions.pop(actions.index(child.action))

                actions_e = []
                for a in actions:
                    actions_e.append(self.action_embedding[a])

                near_act, near_idx = utils.find_near_actions(action_e, actions, np.array(actions_e), threshold=0.8)
                if len(near_idx) == 0:
                    child.Q_hat = 0
                else:
                    near_Qs = set()
                    for a in near_act:
                        near_Qs.add(np.mean(list(self.action_values[a])))
                    near_Qs = list(near_Qs)
                    child.Q_hat = utils.softmax_value(near_Qs)

                ucb_value = child.Q \
                            + exploration_constant * np.sqrt(state_node.N + 1) / (child.N + 1) * child_prob \
                            + bonus_constant * np.sqrt(state_node.N + 1) / (child.N + 1) * child.Q_hat

            else:
                raise NotImplementedError

            if ucb_value == best_value:
                best_children.append(child)
            elif ucb_value > best_value:
                best_value = ucb_value
                best_children = [child]

        return np.random.choice(best_children)
Example #19
0
def train(model, train_data, test_data, optimizer, loss_fn, w2i, task_id,
          batch_size, n_epoch):
    for epoch in range(n_epoch):
        model.train()
        # print('epoch', epoch)
        correct = 0
        count = 0
        random.shuffle(train_data)
        for i in range(0, len(train_data) - batch_size, batch_size):
            batch_data = train_data[i:i + batch_size]
            story = [d[0] for d in batch_data]
            story_len = min(max_story_len, max([len(s) for s in story]))
            story_sent_len = max([len(sent) for s in story for sent in s])
            question = [d[1] for d in batch_data]
            question_sent_len = max([len(sent) for sent in question])

            vec_data = vectorize(batch_data, w2i, story_len, story_sent_len,
                                 question_sent_len)
            story = [d[0] for d in vec_data]
            question = [d[1] for d in vec_data]
            answer = [d[2][0] for d in vec_data]

            story = to_var(torch.LongTensor(story))
            question = to_var(torch.LongTensor(question))
            answer = to_var(torch.LongTensor(answer))

            pred = model(story, question)

            loss = loss_fn(pred, answer)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # reset padding index weight
            for name, param in model.named_parameters():
                if param.grad is not None:
                    if 'A.' in name:
                        param.data[0] = 0

            pred_idx = pred.max(1)[1]
            correct += torch.sum(pred_idx == answer).item()
            count += batch_size

            # for p in model.parameters():
            #     torch.nn.utils.clip_grad_norm(p, 40.0)

        if epoch % 20 == 0:
            print('=======Epoch {}======='.format(epoch))
            print('Training Acc: {:.2f}% - '.format(correct / count * 100),
                  correct, '/', count)
            test(model, test_data, w2i, batch_size, task_id)
Example #20
0
    def classify(self, _buffer, fixed):
        '''
		Classify sign based on input and display message on result
		'''
        result = None

        # Classify fixed sign
        if fixed and self.nbFixed:
            result = self.nbFixed.probabilities(
                utils.vectorize(_buffer, fixed=True))

        # Classify gesture sign
        elif self.nbGesture:
            result = self.nbGesture.probabilities(
                utils.vectorize(_buffer, fixed=False))

        # Novelty detection based on results euclidean distance
        if result[0][2] < 0.85:
            self.output.set(result[0][0])
        else:
            self.output.set("not recognized...")
        sleep(3)
        self.output.set("")
Example #21
0
    def _create_train_op(self, grads_and_vars, extra_grads_and_vars=[]):
        '''
    Args:
      grads_and_vars: gradients to apply and compute running average variance
      extra_grads_and_vars: gradients to apply (not used to compute average variance)
    '''
        # Variance summaries
        first_moment = U.vectorize(grads_and_vars, skip_none=True)
        second_moment = tf.square(first_moment)
        self.maintain_ema_ops.append(
            self.ema.apply([first_moment, second_moment]))

        # Add baseline losses
        if len(self.baseline_loss) > 0:
            mean_baseline_loss = tf.reduce_mean(tf.add_n(self.baseline_loss))
            extra_grads_and_vars += self.optimizer_class.compute_gradients(
                mean_baseline_loss, var_list=tf.get_collection('BASELINE'))

        # Ensure that all required tensors are computed before updates are executed
        extra_optimizer = tf.train.AdamOptimizer(learning_rate=10 *
                                                 self.hparams.learning_rate,
                                                 beta2=self.hparams.beta2)
        with tf.control_dependencies([
                tf.group(*[
                    g for g, _ in (grads_and_vars + extra_grads_and_vars)
                    if g is not None
                ])
        ]):

            # Filter out the P_COLLECTION variables if we're in eval mode
            if self.eval_mode:
                grads_and_vars = [(g, v) for g, v in grads_and_vars
                                  if v not in tf.get_collection(P_COLLECTION)]

            train_op = self.optimizer_class.apply_gradients(
                grads_and_vars, global_step=self.global_step)

            if len(extra_grads_and_vars) > 0:
                extra_train_op = extra_optimizer.apply_gradients(
                    extra_grads_and_vars)
            else:
                extra_train_op = tf.no_op()

            self.optimizer = tf.group(train_op, extra_train_op,
                                      *self.maintain_ema_ops)

        # per parameter variance
        variance_estimator = (self.ema.average(second_moment) -
                              tf.square(self.ema.average(first_moment)))
        self.grad_variance = tf.reduce_mean(variance_estimator)
    def classify(self, _buffer, fixed):
        '''
		Classifie le signe selont l'input et affiche un message
		'''
        result = None

        # Classifie les signes fixes
        if fixed and self.nbFixed:
            result = self.nbFixed.probabilities(
                utils.vectorize(_buffer, fixed=True))

        # Classifie les signes gestuelles
        elif self.nbGesture:
            result = self.nbGesture.probabilities(
                utils.vectorize(_buffer, fixed=False))

        # Detection basée sur les résultats de la distance euclidienne
        if result[0][2] < 0.85:
            self.output.set(result[0][0])
        else:
            self.output.set("non reconnue ...")
        sleep(3)
        self.output.set("")
Example #23
0
def main(args):
    logging.info('-' * 50)
    logging.info('Load data files..')
    question_belong = []
    if args.debug:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file, 100, relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file, 100, relabeling=args.relabeling, question_belong=question_belong)
    else:
#        logging.info('*' * 10 + ' Train')
#        train_examples = utils.load_data(args.train_file, relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling, question_belong=question_belong)

#    args.num_train = len(train_examples[0])
    args.num_dev = len(dev_examples[0])

    logging.info('-' * 50)
    logging.info('Build dictionary..')
#    word_dict = utils.build_dict(train_examples[0] + train_examples[1] + train_examples[2], args.max_vocab_size)
    word_dict = pickle.load(open("../../obj/dict.pkl", "rb"))
    logging.info('-' * 50)
    embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file)
    (args.vocab_size, args.embedding_size) = embeddings.shape
    logging.info('Compile functions..')
    train_fn, test_fn, attention_fn, params, all_params = build_fn(args, embeddings)
    logging.info('Done.')
    logging.info('-' * 50)
    logging.info(args)

    logging.info('-' * 50)
    logging.info('Intial test..')
    dev_x1, dev_x2, dev_x3, dev_y = utils.vectorize(dev_examples, word_dict, sort_by_len=not args.test_only, concat=args.concat)
    word_dict_r = {}
    word_dict_r[0] = "unk"
    assert len(dev_x1) == args.num_dev
    all_dev = gen_examples(dev_x1, dev_x2, dev_x3, dev_y, args.batch_size, args.concat)
    dev_acc, n_examples, prediction, all_examples= eval_acc(test_fn, all_dev)
    
    logging.info('Dev accuracy: %.2f %%' % dev_acc.mean())
    print(dev_acc.mean())
    
    alpha= attention_func(attention_fn, all_dev)
    
    if args.test_only:
        return dev_acc,n_examples, prediction, all_examples, alpha
Example #24
0
def test(model, data, act_data, emo_data):
    print('Test---------------------')
    model.eval()
    total, acc = 0, 0
    for i, (d, a, e) in tqdm(enumerate(zip(data, act_data, emo_data)),
                             total=len(data)):
        batch = vectorize(w2i, d, a, e)
        x = torch.stack([turn[0] for turn in batch], 0)
        act_labels = torch.stack([turn[1] for turn in batch], 0).squeeze(1)
        preds = model(x)
        acc += torch.sum(act_labels == torch.max(preds, 1)[1]).data[0]
        total += x.size(0)

        if i % 500 == 0:
            print('Acc {:.2f}%'.format(100 * acc / total))

    print('Final Acc {:.2f}%'.format(100 * acc / total))
Example #25
0
  def _create_train_op(self, grads_and_vars, extra_grads_and_vars=[]):
    '''
    Args:
      grads_and_vars: gradients to apply and compute running average variance
      extra_grads_and_vars: gradients to apply (not used to compute average variance)
    '''
    # Variance summaries
    first_moment = U.vectorize(grads_and_vars, skip_none=True)
    second_moment = tf.square(first_moment)
    self.maintain_ema_ops.append(self.ema.apply([first_moment, second_moment]))

    # Add baseline losses
    if len(self.baseline_loss) > 0:
      mean_baseline_loss = tf.reduce_mean(tf.add_n(self.baseline_loss))
      extra_grads_and_vars += self.optimizer_class.compute_gradients(
          mean_baseline_loss,
          var_list=tf.get_collection('BASELINE'))

    # Ensure that all required tensors are computed before updates are executed
    extra_optimizer = tf.train.AdamOptimizer(
        learning_rate=10*self.hparams.learning_rate,
        beta2=self.hparams.beta2)
    with tf.control_dependencies(
        [tf.group(*[g for g, _ in (grads_and_vars + extra_grads_and_vars) if g is not None])]):

      # Filter out the P_COLLECTION variables if we're in eval mode
      if self.eval_mode:
        grads_and_vars = [(g, v) for g, v in grads_and_vars
                          if v not in tf.get_collection(P_COLLECTION)]

      train_op = self.optimizer_class.apply_gradients(grads_and_vars,
                                                      global_step=self.global_step)

      if len(extra_grads_and_vars) > 0:
        extra_train_op = extra_optimizer.apply_gradients(extra_grads_and_vars)
      else:
        extra_train_op = tf.no_op()

      self.optimizer = tf.group(train_op, extra_train_op, *self.maintain_ema_ops)

    # per parameter variance
    variance_estimator = (self.ema.average(second_moment) -
        tf.square(self.ema.average(first_moment)))
    self.grad_variance = tf.reduce_mean(variance_estimator)
Example #26
0
def train(model, data, act_data, emo_data, start_epoch=0, n_epochs=20):
    print('Train---------------------')
    model.train()
    loss_fn = F.nll_loss
    for epoch in range(start_epoch, n_epochs):
        print('Epoch', epoch)
        indices = [i for i in range(len(data))]
        random.shuffle(indices)
        total, acc = 0, 0
        # for i, (d, a, e) in tqdm(enumerate(zip(data, act_data, emo_data)), total=len(data)):
        for i, idx in tqdm(enumerate(indices), total=len(indices)):
            d, a, e = data[idx], act_data[idx], emo_data[idx]
            batch = vectorize(w2i, d, a, e)
            x = torch.stack([turn[0] for turn in batch], 0)
            act_labels = torch.stack([turn[1] for turn in batch], 0).squeeze(1)
            preds = model(x)
            acc += torch.sum(act_labels == torch.max(preds, 1)[1]).data[0]
            total += x.size(0)
            loss = loss_fn(preds, act_labels)
            # print(loss.data[0])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % 500 == 0:
                print('Epoch {}, Acc {:.2f}%, loss {:.3f}'.format(
                    epoch, 100 * acc / total, loss.data[0]))

        filename = 'ckpts/SimpleLSTM-Epoch-{}.model'.format(epoch)
        save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            },
            filename=filename)
Example #27
0
        dropout_keep_prob = graph.get_operation_by_name(
            "dropout_keep_prob").outputs[0]
        is_training = graph.get_operation_by_name("is_training").outputs[0]

        # Tensors we want to evaluate
        predictions = graph.get_operation_by_name(
            "output/predictions").outputs[0]

        # Generate batches for one epoch
        all_labels = []
        all_preds = []
        for batch in test.bacth_iter(FLAGS.batch_size,
                                     desc="Testing",
                                     shuffle=False):
            labels, docs = zip(*batch)
            padded_docs, _, _, _ = vectorize(docs)
            feed_dict = {
                inputs: padded_docs,
                # inputs_mask: padded_docs_mask,
                is_training: False,
                dropout_keep_prob: 1.0
            }
            preds = sess.run(predictions, feed_dict)
            all_labels = np.concatenate([all_labels, labels])
            all_preds = np.concatenate([all_preds, preds])

# Print accuracy
if all_labels is not None:
    correct_preds = float(sum(all_preds == all_labels))
    print("Total number of test examples: {}".format(len(all_labels)))
    print("Accuracy: {:g}".format(correct_preds / float(len(all_labels))))
Example #28
0
def train():
    word_dict = load_vocab(FLAGS.vocab_data)
    glove = load_glove("../glove.6B.{}d.txt".format(FLAGS.embedding_size),
                       FLAGS.embedding_size, word_dict)
    train = Dataset(filepath=FLAGS.train_data,
                    num_class=FLAGS.num_class,
                    sequence_length=FLAGS.sequence_length)
    valid = Dataset(filepath=FLAGS.valid_data,
                    num_class=FLAGS.num_class,
                    sequence_length=FLAGS.sequence_length)

    with tf.Graph().as_default():
        session_conf = tf.compat.v1.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.compat.v1.Session(config=session_conf)
        with sess.as_default():
            rcnn = TextRCNN(vocab_size=len(word_dict),
                            embedding_size=FLAGS.embedding_size,
                            sequence_length=FLAGS.sequence_length,
                            num_class=FLAGS.num_class,
                            cell_type=FLAGS.cell_type,
                            hidden_size=FLAGS.hidden_size,
                            pretrained_embeddings=glove,
                            l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define training procedure
            global_step = tf.compat.v1.Variable(0,
                                                name="global_step",
                                                trainable=False)
            train_op = tf.compat.v1.train.AdamOptimizer(
                FLAGS.learning_rate).minimize(rcnn.loss,
                                              global_step=global_step)
            acc, acc_op = tf.compat.v1.metrics.accuracy(
                labels=rcnn.labels,
                predictions=rcnn.predictions,
                name="metrics/acc")
            metrics_vars = tf.compat.v1.get_collection(
                tf.compat.v1.GraphKeys.LOCAL_VARIABLES, scope="metrics")
            metrics_init_op = tf.compat.v1.variables_initializer(
                var_list=metrics_vars)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.compat.v1.summary.scalar("loss", rcnn.loss)
            acc_summary = tf.compat.v1.summary.scalar("accuracy",
                                                      rcnn.accuracy)

            # Train summaries
            train_summary_op = tf.compat.v1.summary.merge(
                [loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.compat.v1.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Valid summaries
            valid_step = 0
            valid_summary_op = tf.compat.v1.summary.merge(
                [loss_summary, acc_summary])
            valid_summary_dir = os.path.join(out_dir, "summaries", "valid")
            valid_summary_writer = tf.compat.v1.summary.FileWriter(
                valid_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables(),
                                             max_to_keep=FLAGS.num_checkpoints)

            # initialize all variables
            best_valid_acc = 0.0
            sess.run(tf.compat.v1.global_variables_initializer())
            sess.run(tf.compat.v1.local_variables_initializer())

            # training and validating loop
            for epoch in range(FLAGS.num_epoch):
                print('-' * 100)
                print('\n{}> epoch: {}\n'.format(
                    datetime.datetime.now().isoformat(), epoch))
                sess.run(metrics_init_op)
                # Training process
                for batch in train.bacth_iter(FLAGS.batch_size,
                                              desc="Training",
                                              shuffle=True):
                    labels, docs = zip(*batch)
                    padded_docs, _, _ = vectorize(docs, FLAGS.sequence_length)
                    feed_dict = {
                        rcnn.inputs: padded_docs,
                        rcnn.labels: labels,
                        rcnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                    }
                    _, step, summaries, loss, accuracy, _ = sess.run([
                        train_op, global_step, train_summary_op, rcnn.loss,
                        rcnn.accuracy, acc_op
                    ], feed_dict)
                    train_summary_writer.add_summary(summaries, step)

                print("\ntraining accuracy = {:.2f}\n".format(
                    sess.run(acc) * 100))

                sess.run(metrics_init_op)
                # Validating process
                for batch in valid.bacth_iter(FLAGS.batch_size,
                                              desc="Validating",
                                              shuffle=False):
                    valid_step += 1
                    labels, docs = zip(*batch)
                    padded_docs, _, _ = vectorize(docs, FLAGS.sequence_length)
                    feed_dict = {
                        rcnn.inputs: padded_docs,
                        rcnn.labels: labels,
                        rcnn.dropout_keep_prob: 1.0
                    }
                    summaries, loss, accuracy, _ = sess.run(
                        [valid_summary_op, rcnn.loss, rcnn.accuracy, acc_op],
                        feed_dict)
                    valid_summary_writer.add_summary(summaries,
                                                     global_step=valid_step)

                valid_acc = sess.run(acc) * 100
                print("\nvalidating accuracy = {:.2f}\n".format(valid_acc))

                # model checkpoint
                if valid_acc > best_valid_acc:
                    best_valid_acc = valid_acc
                    print("current best validating accuracy = {:.2f}\n".format(
                        best_valid_acc))
                    path = saver.save(sess, checkpoint_prefix)
                    print("saved model checkpoint to {}\n".format(path))

            print("{} optimization finished!\n".format(
                datetime.datetime.now()))
            print("best validating accuracy = {:.2f}\n".format(best_valid_acc))
Example #29
0
def main(args):
    logging.info('-' * 50)
    logging.info('Load data files..')
    if not (args.test_only):
        logging.info('*' * 10 + ' All')
        all_examples = utils.load_data(args.all_file,
                                       100,
                                       relabeling=args.relabeling)
        dev_ratio = args.dev_ratio
        sample_index = np.arange(len(all_examples[0]))
        random.seed(1000)
        dev_index = random.sample(sample_index,
                                  int(dev_ratio * len(sample_index)))
        train_index = np.setdiff1d(sample_index, dev_index)
        dev_examples = tuple_part(all_examples, dev_index)
        train_examples = tuple_part(all_examples, train_index)
        #feature preprocessing
        train_fea_flat_np = FeaExtract(train_examples[-1])
        dev_fea_flat_np = FeaExtract(dev_examples[-1])
        train_fea_flat_np2, dev_fea_flat_np2 = Prepocessing_func(
            train_fea_flat_np,
            dev_fea_flat_np,
            varian_ratio_tol=args.pca_ratio)
        train_fea_merge = FeaMerge(train_fea_flat_np2, train_examples[-1])
        dev_fea_merge = FeaMerge(dev_fea_flat_np2, dev_examples[-1])
        train_examples = train_examples[:-1] + (train_fea_merge, )
        dev_examples = dev_examples[:-1] + (dev_fea_merge, )
        args.num_train = len(train_examples[0])
    else:
        #        logging.info('*' * 10 + ' Train')
        #        train_examples = utils.load_data(args.train_file, relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file,
                                       args.max_dev,
                                       relabeling=args.relabeling)
        dev_fea_flat_np = FeaExtract(dev_examples[-1])
        dev_fea_flat_np2 = PrepocessingApply_func(dev_fea_flat_np)
        dev_fea_merge = FeaMerge(dev_fea_flat_np2, dev_examples[-1])
        dev_examples = dev_examples[:-1] + (dev_fea_merge, )

    args.num_dev = len(dev_examples[0])
    args.mea_num = dev_examples[4][0].shape[-1]

    logging.info('-' * 50)
    logging.info('Build dictionary..')
    word_dict = pickle.load(open("../../obj/dict.pkl", "rb"))
    logging.info('-' * 50)
    embeddings = utils.gen_embeddings(word_dict, args.embedding_size,
                                      args.embedding_file)
    (args.vocab_size, args.embedding_size) = embeddings.shape
    logging.info('Compile functions..')
    train_fn, test_fn, params, all_params = build_fn(args, embeddings)
    logging.info('Done.')
    logging.info('-' * 50)
    logging.info(args)

    logging.info('-' * 50)
    logging.info('Intial test..')
    dev_x1, dev_x2, dev_x3, dev_y, dev_x4 = utils.vectorize(
        dev_examples,
        word_dict,
        sort_by_len=not args.test_only,
        concat=args.concat)
    word_dict_r = {}
    word_dict_r[0] = "unk"
    assert len(dev_x1) == args.num_dev
    all_dev = gen_examples(dev_x1, dev_x2, dev_x3, dev_y, dev_x4,
                           args.batch_size, args.concat)
    dev_acc, rediction = eval_acc(test_fn, all_dev)

    logging.info('Dev accuracy: %.2f %%' % dev_acc.mean())
    print(dev_acc.mean())

    best_dev_acc = dev_acc
    best_train_acc = 0
    if args.test_only:
        return dev_acc, best_train_acc
    utils.save_params(args.model_file, all_params, epoch=0, n_updates=0)
    # Training
    logging.info('-' * 50)
    logging.info('Start training..')
    train_x1, train_x2, train_x3, train_y, train_x4 = utils.vectorize(
        train_examples, word_dict, concat=args.concat)
    assert len(train_x1) == args.num_train
    start_time = time.time()
    n_updates = 0

    all_train = gen_examples(train_x1, train_x2, train_x3, train_y, train_x4,
                             args.batch_size, args.concat)
    for epoch in range(args.num_epoches):
        np.random.shuffle(all_train)
        for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3, mb_mask3, mb_y,
                  mb_x4, mb_mask4) in enumerate(all_train):

            train_loss = train_fn(mb_x1, mb_mask1, mb_x3, mb_mask3, mb_y,
                                  mb_x4)
            #            if idx % 100 == 0:
            #            if epoch % 100 == 0:
            #                logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1]))
            #                logging.info('Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' % (epoch, idx, len(all_train), train_loss, time.time() - start_time))
            n_updates += 1

            if n_updates % args.eval_iter == 0:
                print([x.get_value() for x in params])
                print([x.get_value() for x in all_params])
                samples = sorted(
                    np.random.choice(args.num_train,
                                     min(args.num_train, args.num_dev),
                                     replace=False))
                sample_train = gen_examples(
                    [train_x1[k]
                     for k in samples], [train_x2[k] for k in samples],
                    [train_x3[k * 4 + o] for k in samples
                     for o in range(4)], [train_y[k] for k in samples],
                    [train_x4[k]
                     for k in samples], args.batch_size, args.concat)
                acc, pred = eval_acc(test_fn, sample_train)
                logging.info('Train accuracy: %.2f %%' % acc)
                train_acc, pred = eval_acc(test_fn, all_train)
                logging.info('train accuracy: %.2f %%' % train_acc)
                dev_acc, pred = eval_acc(test_fn, all_dev)
                logging.info('Dev accuracy: %.2f %%' % dev_acc)
                if dev_acc > best_dev_acc:
                    best_dev_acc = dev_acc
                    logging.info(
                        'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%'
                        % (epoch, n_updates, best_dev_acc))
                    best_train_acc = acc
                    logging.info(
                        'Best train accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%'
                        % (epoch, n_updates, best_train_acc))
                    utils.save_params(
                        args.model_file,
                        all_params,
                        epoch=epoch,
                        n_updates=n_updates,
                    )

    return best_dev_acc, best_train_acc
Example #30
0
def main(args):
    logging.info('-' * 50)
    logging.info('Load data files..')
    question_belong = []
    if args.debug:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file,
                                         100,
                                         relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file,
                                       100,
                                       relabeling=args.relabeling,
                                       question_belong=question_belong)
    else:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file,
                                         relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file,
                                       args.max_dev,
                                       relabeling=args.relabeling,
                                       question_belong=question_belong)

    args.num_train = len(train_examples[0])
    args.num_dev = len(dev_examples[0])

    logging.info('-' * 50)
    logging.info('Build dictionary..')
    #word_dict = utils.build_dict(train_examples[0] + train_examples[1] + train_examples[2], args.max_vocab_size)
    word_dict = pickle.load(open("../obj/dict.pkl", "rb"))
    logging.info('-' * 50)
    embeddings = utils.gen_embeddings(word_dict, args.embedding_size,
                                      args.embedding_file)  # EMBEDDING
    (args.vocab_size, args.embedding_size) = embeddings.shape
    logging.info('Compile functions..')
    train_fn, test_fn, params, all_params = build_fn(args, embeddings)
    logging.info('Done.')
    logging.info('-' * 50)
    logging.info(args)

    logging.info('-' * 50)
    logging.info('Intial test..')
    dev_x1, dev_x2, dev_x3, dev_y = utils.vectorize(
        dev_examples,
        word_dict,
        sort_by_len=not args.test_only,
        concat=args.concat)
    word_dict_r = {}
    word_dict_r[0] = "unk"
    assert len(dev_x1) == args.num_dev
    all_dev = gen_examples(dev_x1, dev_x2, dev_x3, dev_y, args.batch_size,
                           args.concat)
    dev_acc, pred = eval_acc(test_fn, all_dev)
    logging.info('Dev accuracy: %.2f %%' % dev_acc)
    best_acc = dev_acc
    if args.test_only:
        return
    utils.save_params(args.model_file, all_params, epoch=0, n_updates=0)

    # Training
    logging.info('-' * 50)
    logging.info('Start training..')
    train_x1, train_x2, train_x3, train_y = utils.vectorize(train_examples,
                                                            word_dict,
                                                            concat=args.concat)
    assert len(train_x1) == args.num_train
    start_time = time.time()
    n_updates = 0

    all_train = gen_examples(train_x1, train_x2, train_x3, train_y,
                             args.batch_size, args.concat)
    for epoch in range(args.num_epoches):
        np.random.shuffle(all_train)
        for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3, mb_mask3,
                  mb_y) in enumerate(all_train):

            train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3,
                                  mb_mask3, mb_y)
            if idx % 100 == 0:
                logging.info('#Examples = %d, max_len = %d' %
                             (len(mb_x1), mb_x1.shape[1]))
                logging.info(
                    'Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)'
                    % (epoch, idx, len(all_train), train_loss,
                       time.time() - start_time))
            n_updates += 1

            if n_updates % args.eval_iter == 0:
                samples = sorted(
                    np.random.choice(args.num_train,
                                     min(args.num_train, args.num_dev),
                                     replace=False))
                sample_train = gen_examples(
                    [train_x1[k]
                     for k in samples], [train_x2[k] for k in samples],
                    [train_x3[k * 4 + o] for k in samples
                     for o in range(4)], [train_y[k] for k in samples],
                    args.batch_size, args.concat)
                acc, pred = eval_acc(test_fn, sample_train)
                logging.info('Train accuracy: %.2f %%' % acc)
                dev_acc, pred = eval_acc(test_fn, all_dev)
                logging.info('Dev accuracy: %.2f %%' % dev_acc)
                if dev_acc > best_acc:
                    best_acc = dev_acc
                    logging.info(
                        'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%'
                        % (epoch, n_updates, dev_acc))
                    utils.save_params(args.model_file,
                                      all_params,
                                      epoch=epoch,
                                      n_updates=n_updates)
Example #31
0
 def block_trunc_pad_zeroes(item, xlen=max_words_len):
     return np_pad_to_size(
         [vectorize(word, embeddings_model) for word in item[:xlen]],
         minsizes=(None, xlen, None))
Example #32
0
def main():

    start = timer()

    if (os.path.isfile("data/tweets" + str(max_example) + ".npy")
            and os.path.isfile("data/emojis" + str(max_example) + ".npy")):
        tweets = np.load("data/tweets" + str(max_example) + ".npy").tolist()
        emojis = np.load("data/emojis" + str(max_example) + ".npy").tolist()
    else:
        tweets, emojis = utils.load_data(path='data/final_train',
                                         max_example=max_example)
        np.save("data/tweets" + str(max_example) + ".npy", np.array(tweets))
        np.save("data/emojis" + str(max_example) + ".npy", np.array(emojis))

    if (os.path.isfile("data/dev_tweets" + str(max_dev_example) + ".npy") and
            os.path.isfile("data/dev_emojis" + str(max_dev_example) + ".npy")):
        dev_tweets = np.load("data/dev_tweets" + str(max_dev_example) +
                             ".npy").tolist()
        dev_emojis = np.load("data/dev_emojis" + str(max_dev_example) +
                             ".npy").tolist()
    else:
        dev_tweets, dev_emojis = utils.load_data(max_example=max_dev_example)
        np.save("data/dev_tweets" + str(max_dev_example) + ".npy",
                np.array(dev_tweets))
        np.save("data/dev_emojis" + str(max_dev_example) + ".npy",
                np.array(dev_emojis))

    start1 = timer()
    print(start1 - start)

    word_dict = utils.build_dict(tweets)
    # embeddings = utils.generate_embeddings(word_dict, dim=300, pretrained_path='data/glove.6B.300d.txt')
    embeddings = utils.generate_embeddings(word_dict,
                                           dim=300,
                                           pretrained_path=None)

    end0 = timer()
    print(end0 - start1)

    x, y = utils.vectorize(tweets, emojis, word_dict)
    dev_x, dev_y = utils.vectorize(dev_tweets, dev_emojis, word_dict)

    end1 = timer()
    print(end1 - end0)

    batch_size, input_size, hidden_size, output_size, layers = 32, 300, 200, 20, 1
    all_train = utils.generate_batches(x, y, batch_size=batch_size)
    all_dev = utils.generate_batches(dev_x, dev_y, batch_size=batch_size)

    end2 = timer()
    print(end2 - end1)

    # set the parameters
    # batch_size, input_size, hidden_size, output_size, layers = 64, 50, 200, 20, 1
    vocabulary_size = len(embeddings)

    if run_GRU:
        print("running GRU...")
        # initialize the model
        model = GRU_Classifier(vocabulary_size, input_size, hidden_size,
                               output_size, layers, run_BD_GRU)
        model.word_embeddings.weight.data = torch.FloatTensor(
            embeddings.tolist())
        if torch.cuda.is_available():
            model.cuda()
            (model.word_embeddings.weight.data).cuda()

        loss_function = nn.CrossEntropyLoss()
        if torch.cuda.is_available():
            loss_function.cuda()

        optimizer = optim.Adam(model.parameters(), lr=global_learning_rate)
        epoch_num = 500
        it = 0
        best_dev_acc = 0
        best_f1 = 0

        # model training
        for epoch in range(epoch_num):
            np.random.shuffle(all_train)
            for idx, (mb_x, mb_y, mb_lengths) in enumerate(all_train):
                # sort the input in descending order according to sentence length
                # This is required by nn.utils.rnn.pack_padded_sequence
                sorted_index = len_value_argsort(mb_lengths)
                mb_x = [mb_x[i] for i in sorted_index]
                mb_y = [mb_y[i] for i in sorted_index]
                mb_lengths = [mb_lengths[i] for i in sorted_index]

                print('#Examples = %d, max_seq_len = %d' %
                      (len(mb_x), len(mb_x[0])))
                mb_x = Variable(torch.from_numpy(np.array(mb_x,
                                                          dtype=np.int64)),
                                requires_grad=False)
                if torch.cuda.is_available():
                    mb_x = mb_x.cuda()

                y_pred = model(mb_x.t(), mb_lengths)
                mb_y = Variable(torch.from_numpy(np.array(mb_y,
                                                          dtype=np.int64)),
                                requires_grad=False)
                if torch.cuda.is_available():
                    mb_y = mb_y.cuda()
                loss = loss_function(y_pred, mb_y)
                # print('epoch ', epoch, 'batch ', idx, 'loss ', loss.data[0])

                optimizer.zero_grad()
                loss.backward(retain_graph=True)
                optimizer.step()
                it += 1

                if it % 100 == 0:  # every 100 updates, check dev accuracy
                    correct = 0
                    n_examples = 0
                    ground_truth = []
                    predicted = []
                    for idx, (d_x, d_y, d_lengths) in enumerate(all_dev):
                        ground_truth += d_y
                        n_examples += len(d_x)

                        sorted_index = len_value_argsort(d_lengths)
                        d_x = [d_x[i] for i in sorted_index]
                        d_y = [d_y[i] for i in sorted_index]
                        d_lengths = [d_lengths[i] for i in sorted_index]

                        d_x = Variable(torch.from_numpy(
                            np.array(d_x, dtype=np.int64)),
                                       requires_grad=False)
                        if torch.cuda.is_available():
                            d_x = d_x.cuda()

                        # use pytorch way to calculate the correct count
                        d_y = Variable(torch.from_numpy(
                            np.array(d_y, dtype=np.int64)),
                                       requires_grad=False)
                        if torch.cuda.is_available():
                            d_y = d_y.cuda()
                        y_pred = model(d_x.t(), d_lengths)
                        predicted += list(
                            torch.max(y_pred, 1)[1].view(d_y.size()).data)
                        correct += (torch.max(y_pred, 1)[1].view(
                            d_y.size()).data == d_y.data).sum()

                    dev_acc = correct / n_examples
                    f1 = f1_score(ground_truth, predicted, average='macro')
                    print("Dev Accuracy: %f, F1 Score: %f" % (dev_acc, f1))
                    if f1 > best_f1:
                        best_f1 = f1
                        print("Best F1 Score: %f" % best_f1)
                        gru_output = open('./out/gru_best', 'w')
                        gru_output.write(str(ground_truth) + '\n')
                        gru_output.write(str(predicted) + '\n')
                        gru_output.write(str(best_f1) + ' ' + str(dev_acc))
                        gru_output.close()

                    if dev_acc > best_dev_acc:
                        best_dev_acc = dev_acc
                        print("Best Dev Accuracy: %f" % best_dev_acc)

    if run_LSTM:
        print("Running LSTM...")
        model = LSTM_Classifier(vocabulary_size, input_size, hidden_size,
                                output_size, layers, run_BD_LSTM)
        model.word_embeddings.weight.data = torch.FloatTensor(
            embeddings.tolist())
        if torch.cuda.is_available():
            model.cuda()
            (model.word_embeddings.weight.data).cuda()

        loss_function = nn.CrossEntropyLoss()
        if torch.cuda.is_available():
            loss_function.cuda()

        optimizer = optim.Adam(model.parameters(), lr=global_learning_rate)
        it = 0
        best_dev_acc = 0
        best_f1 = 0
        epoch_num = 500

        # train LSTM
        for epoch in range(epoch_num):
            np.random.shuffle(all_train)

            for idx, (mb_x, mb_y, mb_lengths) in enumerate(all_train):
                sorted_index = len_value_argsort(mb_lengths)
                mb_x = [mb_x[i] for i in sorted_index]
                mb_y = [mb_y[i] for i in sorted_index]
                mb_lengths = [mb_lengths[i] for i in sorted_index]
                print('#Examples = %d, max_seq_len = %d' %
                      (len(mb_x), len(mb_x[0])))

                mb_x = Variable(torch.from_numpy(np.array(mb_x,
                                                          dtype=np.int64)),
                                requires_grad=False)
                if torch.cuda.is_available():
                    mb_x = mb_x.cuda()

                y_pred = model(mb_x.t(), mb_lengths)
                mb_y = Variable(torch.from_numpy(np.array(mb_y,
                                                          dtype=np.int64)),
                                requires_grad=False)
                if torch.cuda.is_available():
                    mb_y = mb_y.cuda()

                loss = loss_function(y_pred, mb_y)
                # print('epoch ', epoch, 'batch ', idx, 'loss ', loss.data[0])

                optimizer.zero_grad()
                loss.backward(retain_graph=True)
                optimizer.step()
                it += 1

                if it % 100 == 0:  # every 100 updates, check dev accuracy
                    correct = 0
                    n_examples = 0
                    ground_truth = []
                    predicted = []
                    for idx, (d_x, d_y, d_lengths) in enumerate(all_dev):
                        ground_truth += d_y
                        n_examples += len(d_x)

                        sorted_index = len_value_argsort(d_lengths)
                        d_x = [d_x[i] for i in sorted_index]
                        d_y = [d_y[i] for i in sorted_index]
                        d_lengths = [d_lengths[i] for i in sorted_index]

                        d_x = Variable(torch.from_numpy(
                            np.array(d_x, dtype=np.int64)),
                                       requires_grad=False)
                        if torch.cuda.is_available():
                            d_x = d_x.cuda()

                        d_y = Variable(torch.from_numpy(
                            np.array(d_y, dtype=np.int64)),
                                       requires_grad=False)
                        if torch.cuda.is_available():
                            d_y = d_y.cuda()
                        y_pred = model(d_x.t(), d_lengths)
                        predicted += list(
                            torch.max(y_pred, 1)[1].view(d_y.size()).data)
                        correct += (torch.max(y_pred, 1)[1].view(
                            d_y.size()).data == d_y.data).sum()

                    dev_acc = correct / n_examples
                    f1 = f1_score(ground_truth, predicted, average='macro')
                    print("Dev Accuracy: %f, F1 Score: %f" % (dev_acc, f1))
                    if f1 > best_f1:
                        best_f1 = f1
                        print("Best F1 Score: %f" % best_f1)
                        lstm_output = open('./out/lstm_best', 'w')
                        lstm_output.write(str(ground_truth) + '\n')
                        lstm_output.write(str(predicted) + '\n')
                        lstm_output.write(str(best_f1) + ' ' + str(dev_acc))
                        lstm_output.close()

                    if dev_acc > best_dev_acc:
                        best_dev_acc = dev_acc
                        print("Best Dev Accuracy: %f" % best_dev_acc)
Example #33
0
  def compute_gradient_moments(self, grads_and_vars):
    first_moment = U.vectorize(grads_and_vars, set_none_to_zero=True)
    second_moment = tf.square(first_moment)
    self.maintain_ema_ops.append(self.ema.apply([first_moment, second_moment]))

    return self.ema.average(first_moment), self.ema.average(second_moment)
Example #34
0
def main(args):
    logging.info('-' * 50)
    logging.info('Load data files..')

    if args.debug:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file,
                                         100,
                                         relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file,
                                       100,
                                       relabeling=args.relabeling)
    else:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file,
                                         relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file,
                                       args.max_dev,
                                       relabeling=args.relabeling)

    args.num_train = len(train_examples[0])
    args.num_dev = len(dev_examples[0])

    logging.info('-' * 50)
    logging.info('Build dictionary..')
    word_dict = utils.build_dict(train_examples[0] + train_examples[1])
    entity_markers = list(
        set([w for w in word_dict.keys() if w.startswith('@entity')] +
            train_examples[2]))
    entity_markers = ['<unk_entity>'] + entity_markers
    entity_dict = {w: index for (index, w) in enumerate(entity_markers)}
    logging.info('Entity markers: %d' % len(entity_dict))
    args.num_labels = len(entity_dict)

    logging.info('-' * 50)
    # Load embedding file
    embeddings = utils.gen_embeddings(word_dict, args.embedding_size,
                                      args.embedding_file)
    (args.vocab_size, args.embedding_size) = embeddings.shape
    logging.info('Compile functions..')
    train_fn, test_fn, params = build_fn(args, embeddings)
    logging.info('Done.')
    if args.prepare_model:
        return train_fn, test_fn, params

    logging.info('-' * 50)
    logging.info(args)

    logging.info('-' * 50)
    logging.info('Intial test..')
    dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(dev_examples, word_dict,
                                                   entity_dict)
    assert len(dev_x1) == args.num_dev
    all_dev = gen_examples(dev_x1, dev_x2, dev_l, dev_y, args.batch_size)
    dev_acc = eval_acc(test_fn, all_dev)
    logging.info('Dev accuracy: %.2f %%' % dev_acc)
    best_acc = dev_acc

    if args.test_only:
        return

    utils.save_params(args.model_file, params, epoch=0, n_updates=0)

    # Training
    logging.info('-' * 50)
    logging.info('Start training..')
    train_x1, train_x2, train_l, train_y = utils.vectorize(
        train_examples, word_dict, entity_dict)
    assert len(train_x1) == args.num_train
    start_time = time.time()
    n_updates = 0

    all_train = gen_examples(train_x1, train_x2, train_l, train_y,
                             args.batch_size)
    for epoch in range(args.num_epoches):
        np.random.shuffle(all_train)
        for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l,
                  mb_y) in enumerate(all_train):
            logging.info('#Examples = %d, max_len = %d' %
                         (len(mb_x1), mb_x1.shape[1]))
            train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y)
            logging.info(
                'Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)'
                % (epoch, idx, len(all_train), train_loss,
                   time.time() - start_time))
            n_updates += 1

            if n_updates % args.eval_iter == 0:
                samples = sorted(
                    np.random.choice(args.num_train,
                                     min(args.num_train, args.num_dev),
                                     replace=False))
                sample_train = gen_examples([train_x1[k] for k in samples],
                                            [train_x2[k] for k in samples],
                                            train_l[samples],
                                            [train_y[k] for k in samples],
                                            args.batch_size)
                logging.info('Train accuracy: %.2f %%' %
                             eval_acc(test_fn, sample_train))
                dev_acc = eval_acc(test_fn, all_dev)
                logging.info('Dev accuracy: %.2f %%' % dev_acc)
                if dev_acc > best_acc:
                    best_acc = dev_acc
                    logging.info(
                        'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%'
                        % (epoch, n_updates, dev_acc))
                    utils.save_params(args.model_file,
                                      params,
                                      epoch=epoch,
                                      n_updates=n_updates)
Example #35
0
def main(args):
    logging.info('-' * 50)
    logging.info('Load data files..')

    if args.debug:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file, 100, relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file, 100, relabeling=args.relabeling)
    else:
        logging.info('*' * 10 + ' Train')
        train_examples = utils.load_data(args.train_file, relabeling=args.relabeling)
        logging.info('*' * 10 + ' Dev')
        dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling)

    args.num_train = len(train_examples[0])
    args.num_dev = len(dev_examples[0])

    logging.info('-' * 50)
    logging.info('Build dictionary..')
    word_dict = utils.build_dict(train_examples[0] + train_examples[1])
    entity_markers = list(set([w for w in word_dict.keys()
                              if w.startswith('@entity')] + train_examples[2]))
    entity_markers = ['<unk_entity>'] + entity_markers
    entity_dict = {w: index for (index, w) in enumerate(entity_markers)}
    logging.info('Entity markers: %d' % len(entity_dict))
    args.num_labels = len(entity_dict)

    logging.info('-' * 50)
    # Load embedding file
    embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file)
    (args.vocab_size, args.embedding_size) = embeddings.shape
    logging.info('Compile functions..')
    train_fn, test_fn, params = build_fn(args, embeddings)
    logging.info('Done.')

    logging.info('-' * 50)
    logging.info(args)

    logging.info('-' * 50)
    logging.info('Intial test..')
    dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(dev_examples, word_dict, entity_dict)
    assert len(dev_x1) == args.num_dev
    all_dev = gen_examples(dev_x1, dev_x2, dev_l, dev_y, args.batch_size)
    dev_acc = eval_acc(test_fn, all_dev)
    logging.info('Dev accuracy: %.2f %%' % dev_acc)
    best_acc = dev_acc

    if args.test_only:
        return

    utils.save_params(args.model_file, params, epoch=0, n_updates=0)

    # Training
    logging.info('-' * 50)
    logging.info('Start training..')
    train_x1, train_x2, train_l, train_y = utils.vectorize(train_examples, word_dict, entity_dict)
    assert len(train_x1) == args.num_train
    start_time = time.time()
    n_updates = 0

    all_train = gen_examples(train_x1, train_x2, train_l, train_y, args.batch_size)
    for epoch in range(args.num_epoches):
        np.random.shuffle(all_train)
        for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y) in enumerate(all_train):
            logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1]))
            train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y)
            logging.info('Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' %
                         (epoch, idx, len(all_train), train_loss, time.time() - start_time))
            n_updates += 1

            if n_updates % args.eval_iter == 0:
                samples = sorted(np.random.choice(args.num_train, min(args.num_train, args.num_dev),
                                                  replace=False))
                sample_train = gen_examples([train_x1[k] for k in samples],
                                            [train_x2[k] for k in samples],
                                            train_l[samples],
                                            [train_y[k] for k in samples],
                                            args.batch_size)
                logging.info('Train accuracy: %.2f %%' % eval_acc(test_fn, sample_train))
                logging.info('Dev accuracy: %.2f %%' % eval_acc(test_fn, all_dev))
                if dev_acc > best_acc:
                    best_acc = dev_acc
                    logging.info('Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%'
                                 % (epoch, n_updates, dev_acc))
                    utils.save_params(args.model_file, params, epoch=epoch, n_updates=n_updates)