Esempio n. 1
0
    def evaluate(self, session):
        metric_dict = {}
        try:
            while True:
                real_label_ids, logits = session.run(
                    [self.labels, self.logits])
                predict_label_ids = self._logits_to_label_ids(logits)

                predict_labels = DatasetMaker.label_ids_to_labels(
                    predict_label_ids)
                real_labels = DatasetMaker.label_ids_to_labels(real_label_ids)
                metric_dict = metric_collect(real_labels, predict_labels,
                                             metric_dict)
        except tf.errors.OutOfRangeError:
            return metric_dict
Esempio n. 2
0
 def _init_dataset_maker(self, load=False):
     if not load:
         DatasetMaker.generate_mapping(self.train_data)
         DatasetMaker.save_mapping(self.map_file, self.vocabulary_file)
     else:
         DatasetMaker.load_mapping(self.map_file)
         DatasetMaker.save_mapping(self.map_file, self.vocabulary_file)
     FLAGS.char_num = len(DatasetMaker.char_to_id)
     FLAGS.tag_num = len(DatasetMaker.tag_to_id)
 def _init_dataset_maker(self, load=False):
     if not load:
         DatasetMaker.generate_mapping(self.train_data)
         if self.is_chief:
             DatasetMaker.save_mapping(self.map_file, self.vocabulary_file)
     else:
         DatasetMaker.load_mapping(self.map_file)
         if self.is_chief:
             DatasetMaker.save_mapping(self.map_file, self.vocabulary_file)
     FLAGS.char_num = len(DatasetMaker.char_to_id)
     #FLAGS.gram2_num = len(DatasetMaker.gram2_to_id)
     #FLAGS.gram3_num = len(DatasetMaker.gram3_to_id)
     FLAGS.label_num = len(DatasetMaker.label_to_id)
Esempio n. 4
0
 def evaluate(self, session):
     metric_dict = {}
     try:
         while True:
             predict_tag_ids = None
             if self.loss_type == "softmax":
                 lengths, real_tag_ids, logits = session.run(
                     [self.char_len, self.tags, self.logits])
                 predict_tag_ids = self._logits_to_tag_ids(logits)
             elif self.loss_type == "crf":
                 real_tag_ids, logits, lengths, trans = session.run(
                     [self.tags, self.logits, self.char_len, self.trans])
                 predict_tag_ids = self._logits_to_tag_ids(
                     logits, lengths, trans)
             predict_tags = DatasetMaker.tag_ids_to_tags(predict_tag_ids)
             real_tags = DatasetMaker.tag_ids_to_tags(real_tag_ids)
             metric_dict = entity_metric_collect(real_tags, predict_tags,
                                                 lengths, metric_dict)
     except tf.errors.OutOfRangeError:
         return metric_dict
Esempio n. 5
0
    def infer(self, session, file_handler):
        try:
            while True:
                data_ids, logits = session.run([self.ids, self.logits])
                predict_label_ids = self._logits_to_label_ids(logits)

                predict_labels = DatasetMaker.label_ids_to_labels(
                    predict_label_ids)
                file_handler.write(
                    np.concatenate([data_ids, predict_labels], axis=1))
        except tf.errors.OutOfRangeError as e:
            raise e
Esempio n. 6
0
    def infer(self):
        self._init_dataset_maker()

        char_mapping_tensor, label_mapping_tensor = DatasetMaker.make_mapping_table_tensor(
        )
        infer_dataset = DatasetMaker.make_dataset(char_mapping_tensor,
                                                  label_mapping_tensor,
                                                  self.infer_data, 2, "infer",
                                                  1, 0)
        tf.logging.info("The part {}/{} Training dataset is prepared!".format(
            1, 1))
        train_iter = tf.data.Iterator.from_structure(
            infer_dataset.output_types, infer_dataset.output_shapes)
        self.train_init_op = train_iter.make_initializer(infer_dataset)

        infer_session = self._create_session(None)
        infer_session.run(char_mapping_tensor.init)
        infer_session.run(self.train_init_op)

        tf.saved_model.loader.load(infer_session, ["sentiment-analysis"],
                                   self.model_path)
        graph = tf.get_default_graph()
        x_origin = graph.get_tensor_by_name("input_1:0")
        y = graph.get_tensor_by_name("dense_3/Sigmoid:0")

        x = train_iter.get_next()
        xx = infer_session.run(x)
        xx = [line[::-1] for line in xx]
        print(xx)
        s = [
            1268, 7, 468, 1, 428, 85, 44, 331, 76, 2, 60, 354, 2, 8, 68, 221,
            2, 4281, 270, 89, 667, 748, 249
        ]
        print(infer_session.run(y, {x_origin: xx}))
        tf.logging.info("Loading model from {}".format(self.model_path))
        """with tf.gfile.GFile("file_{}".format(self.task_index), "w") as f_w:
Esempio n. 7
0
 def _init_dataset_maker(self):
     DatasetMaker.load_mapping(self.map_file)
     # DatasetMaker.save_mapping(self.map_file, self.vocabulary_file)
     FLAGS.char_num = len(DatasetMaker.char_to_id)
     FLAGS.label_num = len(DatasetMaker.label_to_id)
Esempio n. 8
0
    def train(self):
        self._init_dataset_maker(False)

        train_graph = tf.Graph()
        with train_graph.as_default():
            train_char_mapping_tensor, train_label_mapping_tensor = DatasetMaker.make_mapping_table_tensor(
            )
            train_dataset = DatasetMaker.make_dataset(
                train_char_mapping_tensor, train_label_mapping_tensor,
                self.train_data, FLAGS.batch_size, "train", 1, 0)
            self.global_step = tf.train.get_or_create_global_step()
            train_iter = tf.data.Iterator.from_structure(
                train_dataset.output_types, train_dataset.output_shapes)
            train_init_op = train_iter.make_initializer(train_dataset)
            train_model = TrainModel(train_iter, FLAGS, self.global_step)
            self.train_summary_op = train_model.merge_train_summary_op

        eval_graph = tf.Graph()
        with eval_graph.as_default():
            eval_char_mapping_tensor, eval_label_mapping_tensor = DatasetMaker.make_mapping_table_tensor(
            )
            valid_dataset = DatasetMaker.make_dataset(
                eval_char_mapping_tensor, eval_label_mapping_tensor,
                self.valid_data, FLAGS.batch_size, "eval", 1, 0)
            tf.logging.info("The part 1/1 Validation dataset is prepared!")
            test_dataset = DatasetMaker.make_dataset(
                eval_char_mapping_tensor, eval_label_mapping_tensor,
                self.test_data, FLAGS.batch_size, "eval", 1, 0)
            tf.logging.info("The part 1/1 Test dataset is prepared!")

            eval_iter = tf.data.Iterator.from_structure(
                valid_dataset.output_types, valid_dataset.output_shapes)
            valid_init_op = eval_iter.make_initializer(valid_dataset)
            test_init_op = eval_iter.make_initializer(test_dataset)
            eval_model = EvalModel(eval_iter, FLAGS)

        train_session = self._create_session(train_graph)
        tf.logging.info("Created model with fresh parameters.")
        print_flags(FLAGS)
        save_flags(FLAGS, os.path.join(self.root_path, "config.pkl"))
        with train_session.graph.as_default():
            train_session.run(tf.global_variables_initializer())
        train_session.run(train_char_mapping_tensor.init)
        #train_session.run(train_gram2_mapping_tensor.init)
        #train_session.run(train_gram3_mapping_tensor.init)
        train_session.run(train_label_mapping_tensor.init)
        train_session.run(train_init_op)

        eval_session = self._create_session(eval_graph)
        eval_session.run(eval_char_mapping_tensor.init)
        #eval_session.run(eval_gram2_mapping_tensor.init)
        #eval_session.run(eval_gram3_mapping_tensor.init)
        eval_session.run(eval_label_mapping_tensor.init)

        tf.logging.info("Start training")
        loss = []
        steps_per_epoch = self.train_data_num // FLAGS.batch_size  # how many batches in an epoch
        for i in range(FLAGS.max_epoch):
            for j in range(steps_per_epoch):
                step, loss_value = train_model.train(train_session)
                loss.append(loss_value)
                if step % FLAGS.check_step == 0:
                    iteration = step // steps_per_epoch + 1
                    tf.logging.info(
                        "iteration:{} step:{}/{}, cross entropy loss:{:>9.6f}".
                        format(iteration, step % steps_per_epoch,
                               steps_per_epoch, np.mean(loss)))
                    loss = []

                if step % FLAGS.eval_step == 0:
                    tf.logging.info(
                        "Evaluate Validation Dataset and Test Dataset in step: {}"
                        .format(step))
                    train_model.saver.save(
                        train_session,
                        os.path.join(self.log_dir, "temp_model.ckpt"))
                    tf.logging.info("Saving model parameters in {}".format(
                        os.path.join(self.log_dir, "temp_model.ckpt")))

                    eval_model.saver.restore(
                        eval_session,
                        os.path.join(self.log_dir, "temp_model.ckpt"))
                    tf.logging.info("Loading model from {}".format(
                        os.path.join(self.log_dir, "temp_model.ckpt")))
                    validation_accuracy = self._eval_performance(
                        eval_session, eval_model, "validation", valid_init_op)
                    test_accuracy = self._eval_performance(
                        eval_session, eval_model, "test", test_init_op)
                    eval_model.save_dev_test_summary(self.summary_writer,
                                                     eval_session,
                                                     validation_accuracy,
                                                     test_accuracy, step)
Esempio n. 9
0
    def train(self):
        if self.job_name == "ps":
            with tf.device("/cpu:0"):
                self.server.join()
                return

        self._init_dataset_maker(False)
        train_init_op = None
        valid_init_op = None
        test_init_op = None
        with tf.device(
                tf.train.replica_device_setter(
                    worker_device=self.worker_prefix, cluster=self.cluster)):
            self.global_step = tf.train.get_or_create_global_step()
            if self.job_name == "worker":
                train_dataset = DatasetMaker.make_dataset(
                    self.train_data, FLAGS.batch_size, "train",
                    self.num_worker, self.task_index)
                tf.logging.info(
                    "The part {}/{} Training dataset is prepared!".format(
                        self.task_index + 1, self.num_worker))
                train_iter = tf.data.Iterator.from_structure(
                    train_dataset.output_types, train_dataset.output_shapes)
                train_init_op = train_iter.make_initializer(train_dataset)

                train_model = TrainModel(train_iter, FLAGS, self.global_step)

            elif self.job_name == "chief":
                # build same train graph to synchronize model parameters
                train_dataset = DatasetMaker.make_dataset(
                    self.train_data, FLAGS.batch_size, "train",
                    self.num_worker, self.task_index)
                train_iter = tf.data.Iterator.from_structure(
                    train_dataset.output_types, train_dataset.output_shapes)
                train_model = TrainModel(train_iter, FLAGS, self.global_step)
                self.train_summary_op = train_model.merge_train_summary_op

                # build test graph of same structure but different name scope
                # restore model from train checkpoint, and avoid its updating during validation
                eval_graph = tf.Graph()
                with eval_graph.as_default():
                    valid_dataset = DatasetMaker.make_dataset(
                        self.valid_data, FLAGS.batch_size, "eval", 1, 0)
                    tf.logging.info(
                        "The part 1/1 Validation dataset is prepared!")
                    test_dataset = DatasetMaker.make_dataset(
                        self.test_data, FLAGS.batch_size, "eval", 1, 0)
                    tf.logging.info("The part 1/1 Test dataset is prepared!")

                    eval_iter = tf.data.Iterator.from_structure(
                        valid_dataset.output_types,
                        valid_dataset.output_shapes)
                    valid_init_op = eval_iter.make_initializer(valid_dataset)
                    test_init_op = eval_iter.make_initializer(test_dataset)
                    eval_model = EvalModel(eval_iter, FLAGS, "eval_graph")

        with self._create_session_wrapper(retries=10) as sess:
            try:
                if self.job_name == "worker":
                    DatasetMaker.init_mapping_table_tensor(sess)
                    sess.run(train_init_op)

                    step = 0
                    while not sess.should_stop():
                        global_step_val, loss_value = train_model.train(sess)
                        if (step + 1) % self.check_step == 0:
                            epoch = (global_step_val *
                                     FLAGS.batch_size) // self.train_data_num
                            tf.logging.info(
                                "Job-{}:Worker-{}-----Epoch:{}-Local_Step/Global_Step:{}/{}:Loss is {:.2f}"
                                .format(self.job_name, self.task_index, epoch,
                                        step, global_step_val, loss_value))
                        step += 1
                elif self.job_name == "chief":
                    tf.logging.info("Created model with fresh parameters.")
                    self._print_flags(FLAGS)
                    sess.run(tf.global_variables_initializer())
                    DatasetMaker.init_mapping_table_tensor(sess)
                    # record top N model's performance
                    while True:
                        time.sleep(2)
                        global_step_val = sess.run(self.global_step)
                        if (global_step_val + 1) % self.eval_step == 0:
                            tf.logging.info(
                                "Evaluate Validation Dataset and Test Dataset in step: {}"
                                .format(global_step_val))
                            train_model.saver.save(
                                sess,
                                self.log_dir,
                                latest_filename="temp",
                                global_step=self.global_step)
                            ckpt = tf.train.get_checkpoint_state(
                                self.log_dir, latest_filename="temp")
                            tf.logging.info(
                                "Saving model parameters in {}".format(
                                    ckpt.model_checkpoint_path))

                            eval_model.saver.restore(
                                sess, ckpt.model_checkpoint_path)
                            tf.logging.info("Loading model from {}".format(
                                ckpt.model_checkpoint_path))
                            validation_accuracy = self._eval_performance(
                                sess, EvalModel, "validation", valid_init_op)
                            test_accuracy = self._eval_performance(
                                sess, EvalModel, "test", test_init_op)
                            eval_model.save_dev_test_summary(
                                self.summary_writer, sess, validation_accuracy,
                                test_accuracy, global_step_val)
            except tf.errors.OutOfRangeError as e:
                exc_info = traceback.format_exc(sys.exc_info())
                msg = 'Out of range error:{}\n{}'.format(e, exc_info)
                tf.logging.warn(msg)
                tf.logging.info('Done training -- step limit reached')
    def train(self):
        if self.job_name == "ps":
            with tf.device("/cpu:0"):
                self.server.join()
                return
        if not self.is_chief:
            time.sleep(20)
        self._init_dataset_maker(True)
        ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy(
            self.num_ps)
        with tf.device(
                tf.train.replica_device_setter(
                    worker_device=self.worker_prefix,
                    cluster=self.cluster,
                    ps_strategy=ps_strategy)):
            self.global_step = tf.train.get_or_create_global_step()
            char_mapping_tensor, label_mapping_tensor = DatasetMaker.make_mapping_table_tensor(
            )

            train_dataset = DatasetMaker.make_dataset(
                char_mapping_tensor, label_mapping_tensor, self.train_data,
                FLAGS.batch_size, "train", self.num_worker, self.task_index)
            tf.logging.info(
                "The part {}/{} Training dataset is prepared!".format(
                    self.task_index + 1, self.num_worker))
            train_iter = tf.data.Iterator.from_structure(
                train_dataset.output_types, train_dataset.output_shapes)
            self.train_init_op = train_iter.make_initializer(train_dataset)

            train_model = TrainModel(train_iter, FLAGS, self.global_step)
            self.optimizer = train_model.optimizer
            self.train_summary_op = train_model.merge_train_summary_op

        with self._create_session_wrapper(retries=10) as sess:
            try:
                if self.job_name == "worker":
                    step = 0
                    while not sess.should_stop():
                        global_step_val, loss_value = train_model.train(sess)
                        if (step + 1) % self.check_step == 0:
                            epoch = ((step + 1) *
                                     FLAGS.batch_size) // self.train_data_num
                            tf.logging.info(
                                "Job-{}:Worker-{}-----Local_Step/Global_Step:{}/{}:Loss is {:.4f}"
                                .format(self.job_name, self.task_index, step,
                                        global_step_val, loss_value))
                            tf.logging.info(
                                "Epoch:{}-Processed {}/{} data".format(
                                    epoch, (step + 1) * FLAGS.batch_size %
                                    self.train_data_num, self.train_data_num))
                        step += 1
                elif self.job_name == "chief":
                    print_flags(FLAGS, True)
                    save_flags(FLAGS, os.path.join(self.root_path,
                                                   "config.pkl"), True)
                    tf.logging.info("Waiting for training...")
                    # record top N model's performance
                    while True:
                        time.sleep(5)
                        global_step_val = sess.run(self.global_step)
                        tf.logging.info(
                            "Global step is {}".format(global_step_val))
            except tf.errors.OutOfRangeError as e:
                exc_info = traceback.format_exc(sys.exc_info())
                msg = 'Out of range error:{}\n{}'.format(e, exc_info)
                tf.logging.warn(msg)
                tf.logging.info('Done training -- step limit reached')