Python AdamOptimizer.minimize 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tensorflow.python.training.adam

클래스/타입: AdamOptimizer

메소드/함수: minimize

hotexamples.com에서의 예제들: 6

Python AdamOptimizer.minimize - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tensorflow.python.training.adam.AdamOptimizer.minimize에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

AdamOptimizer(30)

minimize(5)

apply_gradients(4)

compute_gradients(3)

예제 #1

파일 보기

파일: quantization_mnist_test.py 프로젝트: kylin9872/tensorflow

    def _ModelFn(features, labels, mode):
      if is_training:
        logits_out = self._BuildGraph(features)
      else:
        graph_def = self._GetGraphDef(use_trt, batch_size, model_dir)
        logits_out = importer.import_graph_def(
            graph_def,
            input_map={INPUT_NODE_NAME: features},
            return_elements=[OUTPUT_NODE_NAME + ':0'],
            name='')[0]

      loss = losses.sparse_softmax_cross_entropy(
          labels=labels, logits=logits_out)
      summary.scalar('loss', loss)

      classes_out = math_ops.argmax(logits_out, axis=1, name='classes_out')
      accuracy = metrics.accuracy(
          labels=labels, predictions=classes_out, name='acc_op')
      summary.scalar('accuracy', accuracy[1])

      if mode == ModeKeys.EVAL:
        return EstimatorSpec(
            mode, loss=loss, eval_metric_ops={'accuracy': accuracy})
      elif mode == ModeKeys.TRAIN:
        optimizer = AdamOptimizer(learning_rate=1e-2)
        train_op = optimizer.minimize(loss, global_step=get_global_step())
        return EstimatorSpec(mode, loss=loss, train_op=train_op)

예제 #2

파일 보기

파일: quantization_mnist_test.py 프로젝트: roya90/tensorflowExample

        def _ModelFn(features, labels, mode):
            if is_training:
                logits_out = self._BuildGraph(features)
            else:
                graph_def = self._GetGraphDef(use_trt, batch_size, model_dir)
                logits_out = importer.import_graph_def(
                    graph_def,
                    input_map={INPUT_NODE_NAME: features},
                    return_elements=[OUTPUT_NODE_NAME + ':0'],
                    name='')[0]

            loss = losses.sparse_softmax_cross_entropy(labels=labels,
                                                       logits=logits_out)
            summary.scalar('loss', loss)

            classes_out = math_ops.argmax(logits_out,
                                          axis=1,
                                          name='classes_out')
            accuracy = metrics.accuracy(labels=labels,
                                        predictions=classes_out,
                                        name='acc_op')
            summary.scalar('accuracy', accuracy[1])

            if mode == ModeKeys.EVAL:
                return EstimatorSpec(mode,
                                     loss=loss,
                                     eval_metric_ops={'accuracy': accuracy})
            elif mode == ModeKeys.TRAIN:
                optimizer = AdamOptimizer(learning_rate=1e-2)
                train_op = optimizer.minimize(loss,
                                              global_step=get_global_step())
                return EstimatorSpec(mode, loss=loss, train_op=train_op)

예제 #3

파일 보기

파일: model.py 프로젝트: churvey/snake-ai-reinforcement

    def __init__(self, inputs, network, check_point="dqn.ckpt"):
        self.saver = tf.train.Saver()
        self.summary_writer = tf.summary.FileWriter("/tmp/dqn")
        self.inputs = inputs
        self.network = network
        self.targets = tf.placeholder(tf.float32, shape=(None, self.output_shape[1]))
        summary_names = ["actions", "loss", "exploration_rate", "fruits_eaten", "timesteps_survived"]

        self.summary_placeholders = {name: tf.placeholder(dtype=tf.float32) for name in summary_names}

        # self.summary_placeholders = [tf.placeholder(dtype=summary_variables[i].dtype)
        #                              for i in range(len(summary_names))]

        # summary_ops = [tf.assign(summary_variables[i],self.summary_placeholders[i])
        #                for i in range(len(summary_names))

        summary = [tf.summary.histogram(summary_names[i], self.summary_placeholders[summary_names[i]]) for i in
                   range(1)]
        summary += [tf.summary.scalar(summary_names[i], self.summary_placeholders[summary_names[i]]) for i in
                    range(1, len(summary_names))]

        self.summary_ops = tf.summary.merge_all()

        self.loss = tf.losses.mean_squared_error(self.network, self.targets)
        optimizer = AdamOptimizer()
        self.train_step = optimizer.minimize(loss=self.loss)
        #
        # with tf.colocate_with(global_step):
        #     self.update_op = tf.assign_add(global_step, 1)

        self.sess = tf.Session()

        self.summary_writer.add_graph(tf.get_default_graph())

        with self.sess.as_default():
            tf.global_variables_initializer().run()

        if os.path.exists(check_point):
            self.saver.restore(self.sess, check_point)

예제 #4

파일 보기

        class CNN_prior:
            def get_attention(self):
                self.pos_attention = tf.reduce_sum(tf.gradients(self.pre_max[:, 1], self.vectors)[0] * self.vectors, axis=2)
                self.pos_attention = softmax_padding(self.pos_attention, self.padding, axis=1)

                self.neg_attention = tf.reduce_sum(tf.gradients(self.pre_max[:, 0], self.vectors)[0] * self.vectors, axis=2)
                self.neg_attention = softmax_padding(self.neg_attention, self.padding, axis=1)

            def forward(self, v):

                vectors2d = tf.expand_dims(v, 1)  # None x 1 x 200 x 300 ... NHWC

                conv1 = tf.nn.conv2d(
                    input=vectors2d,
                    filter=self.f3,
                    strides=[1, 1, 1, 1],
                    padding="VALID"
                )  # None x 1 x words x 50
                A1 = tf.nn.leaky_relu(conv1 + self.b3)

                self.a1 = A1
                conv2 = tf.nn.conv2d(
                    input=vectors2d,
                    filter=self.f4,
                    strides=[1, 1, 1, 1],
                    padding="VALID"
                )  # None x 1 x words x 50

                A2 = tf.nn.leaky_relu(conv2 + self.b4)
                self.a2 = A2

                conv3 = tf.nn.conv2d(
                    input=vectors2d,
                    filter=self.f5,
                    strides=[1, 1, 1, 1],
                    padding="VALID"
                )  # None x 1 x words x 5

                A3 = tf.nn.leaky_relu(conv3 + self.b5)

                max_A1_train = tf.reshape(tf.squeeze(tf.reduce_max(A1, 2)), [-1, 50])  # None x 5
                max_A2_train = tf.reshape(tf.squeeze(tf.reduce_max(A2, 2)), [-1, 50])  # None x 5
                max_A3_train = tf.reshape(tf.squeeze(tf.reduce_max(A3, 2)), [-1, 50])  # None x 5

                concat = tf.concat([max_A1_train, max_A2_train, max_A3_train], axis=1)
                concat_drop = tf.nn.dropout(concat,keep_prob=self.dropout_rate)
                pre_max_true_drop = tf.matmul(concat_drop, self.relevance_weight) + self.relevance_bias
                rel = tf.nn.softmax(pre_max_true_drop, axis=1)

                sum_A1_train = tf.reshape(tf.squeeze(tf.reduce_sum(A1, 2)), [-1, 50])  # None x 5
                sum_A2_train = tf.reshape(tf.squeeze(tf.reduce_sum(A2, 2)), [-1, 50])  # None x 5
                sum_A3_train = tf.reshape(tf.squeeze(tf.reduce_sum(A3, 2)), [-1, 50])  # None x 5

                concat_sums = tf.concat([sum_A1_train, sum_A2_train, sum_A3_train], axis=1)
                pre_max_sum = tf.matmul(concat_sums, self.relevance_weight) + self.relevance_bias
                return rel, pre_max_true_drop, pre_max_sum

            def groupby(self,att):
                return ndmatmul(self.group_by,att)

            def __init__(self, word_vector_size):
                tf.reset_default_graph()
                self.vector_size = word_vector_size

                self.vectors = tf.placeholder(tf.float32, shape=(None, None, word_vector_size))
                self.user_terms = tf.placeholder(tf.float32, shape=(None, None))
                self.ut2 = tf.placeholder(tf.float32, shape=(None, None))
                self.group_by = tf.placeholder(tf.float32, shape=(None, None, None))
                self.padding = tf.placeholder(tf.float32, shape=(None, None))
                self.output = tf.placeholder(tf.float32, shape=(None, 1))
                self.dropout_rate = tf.placeholder(tf.float32)

                xavier = tf.contrib.layers.xavier_initializer()

                # 50 tri-gram, 50 4-gram and 50 5-gram
                filter_tri = tf.Variable(xavier((1, 2, word_vector_size, 50)), name="weight")  #
                bias_tri = tf.Variable(tf.zeros((1, 50)), name="bias")  #
                self.f3 = filter_tri
                self.b3 = bias_tri

                filter_4 = tf.Variable(xavier((1, 3, word_vector_size, 50)), name="weight")  #
                bias_4 = tf.Variable(tf.zeros((1, 50)), name="bias")
                self.f4 = filter_4
                self.b4 = bias_4

                filter_5 = tf.Variable(xavier((1, 5, word_vector_size, 50)), name="weight")  #
                bias_5 = tf.Variable(tf.zeros((1, 50)), name="bias")
                self.f5 = filter_5
                self.b5 = bias_5

                with tf.name_scope("relevance"):
                    hidden = 150
                    self.relevance_weight = tf.Variable(0.01 * xavier((hidden, 2)))
                    self.relevance_bias = tf.Variable(0.0 * xavier((1, 2)))
                    self.relevance_attention_weight = tf.Variable(0.01 * xavier((100, 2)))
                    self.relevance_attention_bias = tf.Variable(0.0 * xavier((1, 2)))

                rel, pre_max_true_dropped, pre_max_sum = self.forward(self.vectors)
                self.relevance = rel[:, 1]

                ut = tf.expand_dims(self.ut2, 2)  # NWC
                rel_masked, pre_max_true_masked_dropped, _ = self.forward(self.vectors * ut)
                self.rel_masked = rel_masked

                self.pre_max = pre_max_sum
                self.get_attention()

                # true_attention_error = 0.0
                att_reg = 0.0

                prediction_error = -tf.reduce_sum((self.output * tf.log(rel[:, 1] + 10 ** -5, name="log2rel") + (
                        1 - self.output) * tf.log(rel[:, 0] + 10 ** -5, name="log3rel")))

                # N, num_unique, text_length ; N,text_length
                pos_attention = tf.squeeze(tf.matmul(self.group_by, tf.expand_dims(self.pos_attention, -1)),
                                       squeeze_dims=-1)
                neg_attention = tf.squeeze(tf.matmul(self.group_by, tf.expand_dims(self.neg_attention, -1)),
                                       squeeze_dims=-1)
                self.pos_att_grouped = pos_attention
                self.neg_att_grouped = neg_attention

                pos_heads = tf.reduce_sum(tf.multiply(pos_attention, self.user_terms), axis=1)
                neg_heads = tf.reduce_sum(tf.multiply(neg_attention, self.user_terms), axis=1)
                self.pos_heads = pos_heads

                attention_error = 0.0
                occlusion_error = 0.0
                if use_attention:
                    attention_error += tf.reduce_sum(self.output*(pos_heads - 0.5) ** 2)
                    att_reg = tf.reduce_sum(self.output * tf.nn.relu(self.pos_attention - att_max_value)
                                                     + (1-self.output) * tf.nn.relu(self.neg_attention-att_max_value))
                    occlusion_error =  -tf.reduce_sum((self.output * tf.log(rel_masked[:, 1] + 10 ** -5, name="log2rel2") + (
                        1 - self.output) * tf.log(rel_masked[:, 0] + 10 ** -5, name="log3rel2")))


                self.att = attention_error

                self.error = (   prediction_error
                              + tf.sign(tf.reduce_sum(self.user_terms)) * attention_error
                              +  tf.sign(tf.reduce_sum(self.user_terms)) * occlusion_error
                              + tf.sign(tf.reduce_sum(self.user_terms)) * att_reg)

                self.a = tf.check_numerics(attention_error, message="att") + tf.check_numerics(pos_heads,
                                                                                               message="pos-heads") + tf.check_numerics(
                    neg_heads, message="neg-heads")
                self.opt = AdamOptimizer()
                self.optimizer = self.opt.minimize(self.error)
                self.uncertainty = 1

                self.sess = tf.Session()
                self.sess.run(tf.global_variables_initializer())
                self.n_trained = 0
                self.training = False

            def get_feed_dict(self, doc):
                return {self.vectors: np.array(doc.vectors, dtype=np.float32).reshape([1, -1, self.vector_size]),
                        self.output: [[doc.class_ * 1]],
                        self.user_terms: np.array(doc.user_terms, dtype=np.float32).reshape([1, -1]),
                        self.padding: np.array([1 for i in doc.words]).reshape([1, -1])}

            def blow_up(self,mat,num_rows,num_cols):
                blowed_mat = [i+[0]*(num_cols-len(i)) for i in mat]
                x=([0] * num_cols) * (num_rows - len(blowed_mat))
                if x:
                    blowed_mat.append(x)
                return blowed_mat

            def get_feed_dict_multiple(self, docs):
                dp = 0.7 if self.training else 1
                maximum = max([len(doc.vectors) for doc in docs])
                maximum = max([maximum,7])
                max_terms = max([len(doc.user_terms) for doc in docs])
                return {self.vectors: np.array(
                    [doc.vectors[:maximum] + [[0] * (self.vector_size)] * (maximum - len(doc.vectors[:maximum])) for doc
                     in
                     docs]).reshape([-1, maximum, self.vector_size]),
                        self.group_by:np.array([self.blow_up(doc.gb,max_terms,maximum) for doc in docs]),
                        self.ut2: np.array(
                            [doc.ut2[:maximum] + [0] * (maximum - len(doc.ut2[:maximum])) for doc in
                             docs]).reshape([-1, maximum]),
                        self.output: [[doc.class_ * 1] for doc in docs],
                        self.user_terms: np.array(
                            [doc.user_terms[:max_terms] + [0] * (max_terms - len(doc.user_terms[:max_terms])) for doc in
                             docs]).reshape([-1, max_terms]),
                        self.padding: np.array(
                            [[1] * len(doc.vectors[:maximum]) + [0] * (maximum - len(doc.vectors[:maximum])) for doc in
                             docs]).reshape([-1, maximum]),
                        self.dropout_rate:dp}

            def load(self, filename):
                saver = tf.train.Saver()
                saver.restore(self.sess, filename)
                pass

            def train(self, docs, train_full=False):
                self.training = True
                self.sess.run(tf.global_variables_initializer())
                sess = self.sess
                print("====23")
                n = len(docs)
                epochs = 200
                if train_full:
                    epochs = 10
                self.n_trained = n
                import random
                random.shuffle(docs)
                last_10 = [100] * 10
                prev_error = None
                for epoch in range(epochs):
                    total_error = 0
                    for doc_s in [docs[i:i + 1] for i in range(0, len(docs), 1)]:
                        fd = self.get_feed_dict_multiple(doc_s)
                        try:
                            sess.run(self.a, feed_dict=fd)
                        except Exception as e:
                            print("check")
                        _, error = sess.run([self.optimizer, self.error], feed_dict=fd)
                        # print(x,y)
                        # if epoch>50 and x>=0.5:
                        #     print("ch")
                        # print(error,error-x,x)
                        total_error += error
                    total_error = total_error / len(docs)
                    # print(total_error)
                    if train_full:
                        saver = tf.train.Saver()
                        saver.save(sess, "./{}.pkl".format(epoch))
                    # print(total_error)
                    if epoch>10 and total_error > 4:
                        self.train(docs)
                        return
                    last_10.pop(0)
                    last_10.append(total_error)
                    if max(last_10) < 0.05:
                        print("breaking")
                        break
                print(total_error)
                self.training = False

            def run(self, docs):
                sess = self.sess
                for doc_s in [docs[i:i + 1] for i in range(0, len(docs), 1)]:
                    fd = self.get_feed_dict_multiple(doc_s)

                    try:
                        l1 = sess.run([self.relevance, self.pos_att_grouped, self.neg_att_grouped,self.pos_heads],
                                  feed_dict=fd)
                    except Exception as e:
                        print("here")
                    for ind, doc in enumerate(doc_s):
                        d = {
                            "rel": l1[0][ind],
                            "pos_att": l1[1][ind],
                            "neg_att": l1[2][ind],
                            "pos_heads": l1[3][ind]
                        }
                        doc.pred_class = 0 if d["rel"] < 0.5 else 1
                        doc.parameters = d

예제 #5

파일 보기

파일: CNN_Factory.py 프로젝트: chandramouli-sastry/dual-AL

        class CNN_prior:
            def get_attribution(self):
                self.pos_attribution = tf.reduce_sum(
                    tf.gradients(self.pre_max_sum[:, 1], self.vectors)[0] *
                    self.vectors,
                    axis=2)
                self.pos_attribution = softmax_padding(self.pos_attribution,
                                                       self.padding,
                                                       axis=1)

                self.neg_attribution = tf.reduce_sum(
                    tf.gradients(self.pre_max_sum[:, 0], self.vectors)[0] *
                    self.vectors,
                    axis=2)
                self.neg_attribution = softmax_padding(self.neg_attribution,
                                                       self.padding,
                                                       axis=1)

            def forward(self, v):

                vectors2d = tf.expand_dims(v,
                                           1)  # None x 1 x 200 x 300 ... NHWC

                conv1 = tf.nn.conv2d(input=vectors2d,
                                     filter=self.f3,
                                     strides=[1, 1, 1, 1],
                                     padding="VALID")  # None x 1 x words x 50
                A1 = tf.nn.leaky_relu(conv1 + self.b3)

                self.a1 = A1
                conv2 = tf.nn.conv2d(input=vectors2d,
                                     filter=self.f4,
                                     strides=[1, 1, 1, 1],
                                     padding="VALID")  # None x 1 x words x 50

                A2 = tf.nn.leaky_relu(conv2 + self.b4)
                self.a2 = A2

                conv3 = tf.nn.conv2d(input=vectors2d,
                                     filter=self.f5,
                                     strides=[1, 1, 1, 1],
                                     padding="VALID")  # None x 1 x words x 5

                A3 = tf.nn.leaky_relu(conv3 + self.b5)

                max_A1_train = tf.reshape(tf.squeeze(tf.reduce_max(A1, 2)),
                                          [-1, 50])  # None x 5
                max_A2_train = tf.reshape(tf.squeeze(tf.reduce_max(A2, 2)),
                                          [-1, 50])  # None x 5
                max_A3_train = tf.reshape(tf.squeeze(tf.reduce_max(A3, 2)),
                                          [-1, 50])  # None x 5

                concat = tf.concat([max_A1_train, max_A2_train, max_A3_train],
                                   axis=1)
                concat_drop = tf.nn.dropout(concat,
                                            keep_prob=self.dropout_rate)
                pre_max_true_drop = tf.matmul(
                    concat_drop, self.relevance_weight) + self.relevance_bias
                rel = tf.nn.softmax(pre_max_true_drop, axis=1)

                sum_A1_train = tf.reshape(tf.squeeze(tf.reduce_sum(A1, 2)),
                                          [-1, 50])  # None x 5
                sum_A2_train = tf.reshape(tf.squeeze(tf.reduce_sum(A2, 2)),
                                          [-1, 50])  # None x 5
                sum_A3_train = tf.reshape(tf.squeeze(tf.reduce_sum(A3, 2)),
                                          [-1, 50])  # None x 5

                concat_sums = tf.concat(
                    [sum_A1_train, sum_A2_train, sum_A3_train], axis=1)
                pre_max_sum = tf.matmul(
                    concat_sums, self.relevance_weight) + self.relevance_bias
                return rel, pre_max_true_drop, pre_max_sum

            def __init__(self, word_vector_size):
                tf.reset_default_graph()
                self.vector_size = word_vector_size

                self.vectors = tf.placeholder(tf.float32,
                                              shape=(None, None,
                                                     word_vector_size))
                self.user_terms = tf.placeholder(tf.float32,
                                                 shape=(None, None))
                self.padding = tf.placeholder(tf.float32, shape=(None, None))
                self.output = tf.placeholder(tf.float32, shape=(None, 1))
                self.dropout_rate = tf.placeholder(tf.float32)

                xavier = tf.contrib.layers.xavier_initializer()

                # 50 tri-gram, 50 4-gram and 50 5-gram
                filter_tri = tf.Variable(xavier((1, 3, word_vector_size, 50)),
                                         name="weight")  #
                bias_tri = tf.Variable(tf.zeros((1, 50)), name="bias")  #
                self.f3 = filter_tri
                self.b3 = bias_tri

                filter_4 = tf.Variable(xavier((1, 4, word_vector_size, 50)),
                                       name="weight")  #
                bias_4 = tf.Variable(tf.zeros((1, 50)), name="bias")
                self.f4 = filter_4
                self.b4 = bias_4

                filter_5 = tf.Variable(xavier((1, 5, word_vector_size, 50)),
                                       name="weight")  #
                bias_5 = tf.Variable(tf.zeros((1, 50)), name="bias")
                self.f5 = filter_5
                self.b5 = bias_5

                with tf.name_scope("relevance"):
                    hidden = 150
                    self.relevance_weight = tf.Variable(0.01 * xavier(
                        (hidden, 2)))
                    self.relevance_bias = tf.Variable(0.0 * xavier((1, 2)))
                    self.relevance_attention_weight = tf.Variable(
                        0.01 * xavier((100, 2)))
                    self.relevance_attention_bias = tf.Variable(0.0 * xavier(
                        (1, 2)))

                rel, pre_max_true_dropped, pre_max_sum = self.forward(
                    self.vectors)
                self.relevance = rel[:, 1]

                ut = tf.expand_dims(self.user_terms, 2)  # NWC
                rel_masked, pre_max_true_masked_dropped, _ = self.forward(
                    self.vectors * ut)
                self.rel_masked = rel_masked

                self.pre_max_sum = pre_max_sum
                self.get_attribution()

                prediction_error = -tf.reduce_sum(
                    (self.output * tf.log(rel[:, 1] + 10**-5, name="log2rel") +
                     (1 - self.output) *
                     tf.log(rel[:, 0] + 10**-5, name="log3rel")))

                pos_heads = tf.reduce_sum(tf.multiply(self.pos_attribution,
                                                      self.user_terms),
                                          axis=1)
                neg_heads = tf.reduce_sum(tf.multiply(self.neg_attribution,
                                                      self.user_terms),
                                          axis=1)

                misattribution_error = 0.0
                corrective_error = 0.0
                att_reg = 0.0

                if use_attribution:
                    misattribution_error += tf.reduce_sum(
                        self.output * (pos_heads - 0.9)**2 +
                        (1 - self.output) * (neg_heads - 0.9)**2)
                    att_reg = tf.reduce_sum(
                        self.output *
                        tf.nn.relu(self.pos_attribution - att_max_value) +
                        (1 - self.output) *
                        tf.nn.relu(self.neg_attribution - att_max_value))

                    corrective_error = -tf.reduce_sum(
                        (self.output *
                         tf.log(rel_masked[:, 1] + 10**-5, name="log2rel2") +
                         (1 - self.output) *
                         tf.log(rel_masked[:, 0] + 10**-5, name="log3rel2")))

                self.error = (
                    prediction_error +
                    tf.sign(tf.reduce_sum(self.user_terms)) *
                    (misattribution_error + corrective_error + att_reg))

                self.opt = AdamOptimizer()
                self.optimizer = self.opt.minimize(self.error)

                self.sess = tf.Session()
                self.sess.run(tf.global_variables_initializer())
                self.training = False

            def get_feed_dict_multiple(self, docs):
                dp = 0.7 if self.training else 1
                maximum = max([len(doc.vectors) for doc in docs] + [5])
                return {
                    self.vectors:
                    np.array([
                        doc.vectors[:maximum] + [[0] * (self.vector_size)] *
                        (maximum - len(doc.vectors[:maximum])) for doc in docs
                    ]).reshape([-1, maximum, self.vector_size]),
                    self.output: [[doc.class_ * 1] for doc in docs],
                    self.user_terms:
                    np.array([
                        doc.user_terms[:maximum] + [0] *
                        (maximum - len(doc.user_terms[:maximum]))
                        for doc in docs
                    ]).reshape([-1, maximum]),
                    self.padding:
                    np.array([[1] * len(doc.vectors[:maximum]) + [0] *
                              (maximum - len(doc.vectors[:maximum]))
                              for doc in docs]).reshape([-1, maximum]),
                    self.dropout_rate:
                    dp
                }

            def train(self, docs):
                self.training = True

                # Re-initialize the machine during every training round
                self.sess.run(tf.global_variables_initializer())
                sess = self.sess
                print("====")

                epochs = 200  # maximum training epochs
                random.shuffle(docs)

                last_10 = [100] * 10

                for epoch in range(epochs):
                    total_error = 0
                    # Stochastic Gradient Descent (mini-batch size = 1) works best.
                    for doc_s in [
                            docs[i:i + 1] for i in range(0, len(docs), 1)
                    ]:
                        fd = self.get_feed_dict_multiple(doc_s)
                        _, error = sess.run([self.optimizer, self.error],
                                            feed_dict=fd)
                        total_error += error
                    total_error = total_error / len(docs)

                    if epoch > 10 and total_error > 4:
                        self.train(docs)
                        return
                    last_10.pop(0)
                    last_10.append(total_error)
                    if max(last_10) < 0.05:
                        print("breaking")
                        break
                print(total_error)
                self.training = False

            def run(self, docs):
                random.shuffle(docs)
                sess = self.sess
                num_correct = 0
                num_seen = 0
                for doc_s in [docs[i:i + 1] for i in range(0, len(docs), 1)]:
                    fd = self.get_feed_dict_multiple(doc_s)
                    l1 = sess.run([
                        self.relevance, self.pos_attribution,
                        self.neg_attribution
                    ],
                                  feed_dict=fd)
                    for ind, doc in enumerate(doc_s):
                        d = {
                            "rel": l1[0][ind],
                            "pos_att": l1[1][ind],
                            "neg_att": l1[2][ind]
                        }
                        doc.pred_class = 0 if d["rel"] < 0.5 else 1
                        doc.parameters = d
                        num_correct += 1 * (doc.pred_class == doc.class_)
                        num_seen += 1
                    if num_seen % 1000 == 0:
                        print(num_correct / num_seen * 100)

예제 #6

파일 보기

파일: model.py 프로젝트: draplater/dbqa

class DBQA(DependencyParserBase):
    available_data_formats = {
        "word-based": NLPCC16DBQA,
        "character-based": NLPCC16DBQACharacterBased
    }
    default_data_format_name = "word-based"

    @classmethod
    def add_parser_arguments(cls, arg_parser):
        super(DBQA, cls).add_parser_arguments(arg_parser)
        group = arg_parser.add_argument_group(DBQA.__name__)
        group.add_argument("--external-embedding")
        group.add_argument("--batch-size", type=int, default=4096)
        group.add_argument("--embed-size", type=int, default=100)
        group.add_argument("--lstm-size", type=int, default=256)
        group.add_argument("--n-recur", type=int, default=2)
        group.add_argument("--use-bigram", type=int, default=1)
        group.add_argument("--input-keep-prob", type=int, default=1)
        group.add_argument("--recurrent-keep-prob", type=int, default=1)
        group.add_argument("--seed", type=int, default=42)
        group.add_argument("--steps", type=int, default=50000)
        group.add_argument("--merger-type",
                           choices=["rnn", "cnn"],
                           default="rnn")

    def __init__(self, options, data_train, session=None):
        self.statistics = DBQAStatistics.from_data(data_train)
        self.options = options

        self.optimizer = AdamOptimizer()
        self.global_step = tf.train.get_or_create_global_step()

        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

        self.question_2d_pl = tf.placeholder(tf.int32, (None, None))
        self.question_bigram_2d_pl = tf.placeholder(tf.int32, (None, None))
        self.answer_2d_pl = tf.placeholder(tf.int32, (None, None))
        self.answer_bigram_2d_pl = tf.placeholder(tf.int32, (None, None))
        self.wrong_answer_2d_pl = tf.placeholder(tf.int32, (None, None))
        self.wrong_answer_bigram_2d_pl = tf.placeholder(tf.int32, (None, None))

        self.network = PairwiseSimilarity(options, self.statistics)
        self.loss, self.accuracy = self.network.get_loss(
            self.question_2d_pl,
            self.question_bigram_2d_pl,
            self.answer_2d_pl,
            self.answer_bigram_2d_pl,
            self.wrong_answer_2d_pl,
            self.wrong_answer_bigram_2d_pl,
        )

        self.similarity = self.network.get_similarity(
            self.question_2d_pl, self.question_bigram_2d_pl, self.answer_2d_pl,
            self.answer_bigram_2d_pl)

        self.optimize_op = self.optimizer.minimize(
            self.loss, global_step=self.global_step)

        if session is None:
            self.session = self.create_session()
            self.session.run(tf.global_variables_initializer())
        else:
            self.session = session
        self.random = Random(42)

    def create_session(self):
        config_proto = tf.ConfigProto()
        # config_proto.gpu_options.per_process_gpu_memory_fraction = self.options.per_process_gpu_memory_fraction
        return tf.Session(config=config_proto)

    def train(self, data_train):
        for questions_np, questions_bigram_np, \
            corrects_np, corrects_bigram_np, \
            wrongs_np, wrongs_bigram_np in generate_train_batches(
            data_train, self.options.batch_size, self.random
        ):
            step, loss, accuracy, _ = self.session.run(
                [self.global_step, self.loss, self.accuracy, self.optimize_op],
                {
                    self.question_2d_pl: questions_np,
                    self.question_bigram_2d_pl: questions_bigram_np,
                    self.answer_2d_pl: corrects_np,
                    self.answer_bigram_2d_pl: corrects_bigram_np,
                    self.wrong_answer_2d_pl: wrongs_np,
                    self.wrong_answer_bigram_2d_pl: wrongs_bigram_np
                })
            logger.info("Train: Step {}, loss {}, accuracy {}".format(
                step, loss, accuracy))

    @classmethod
    def repeat_train_and_validate(cls, data_train, data_devs, data_test,
                                  options):
        tf.set_random_seed(options.seed)
        parser = cls(options, data_train)
        for question in data_train:
            question.fill_ids(parser.statistics)
        for file_name, data_dev in data_devs.items():
            for question in data_dev:
                question.fill_ids(parser.statistics)
        while True:
            step = parser.session.run(parser.global_step)
            if step > options.steps:
                break
            parser.random.shuffle(data_train)
            parser.train(data_train)
            for file_name, data_dev in data_devs.items():
                try:
                    prefix, suffix = os.path.basename(file_name).rsplit(".", 1)
                except ValueError:
                    prefix = os.path.basename(file_name)
                    suffix = ""
                dev_output = os.path.join(
                    options.output,
                    '{}_step_{}.{}'.format(prefix, step, suffix))
                scores = list(parser.predict(data_dev))
                with open(dev_output, "w") as f_output:
                    for score in scores:
                        f_output.write("{}\n".format(score))

    @classmethod
    def load(cls, prefix, new_options=None):
        pass

    def predict(self, data_dev):
        for questions_np, questions_bigram_np,\
            answer_np, answer_bigram_np in generate_predict_batches(
                data_dev, self.options.batch_size
        ):
            similarities = self.session.run(
                self.similarity, {
                    self.question_2d_pl: questions_np,
                    self.question_bigram_2d_pl: questions_bigram_np,
                    self.answer_2d_pl: answer_np,
                    self.answer_bigram_2d_pl: answer_bigram_np
                })
            for similarity in similarities:
                yield similarity

    def save(self, prefix):
        pass