Esempio n. 1
0
    def __construct_alias_table(self):
        """Construct alias table for all words.
        """
        logger.info("Construct alias table for alias sampling method.")
        vocab_size = self.__model.vocab_size()
        self.__topic_indexes = [[] for _ in range(vocab_size)]
        self.__alias_tables = [VoseAlias() for _ in range(vocab_size)]
        self.__prob_sum = np.zeros(vocab_size)

        # Construct each word's alias table (prior is not included).
        for i in tqdm(range(vocab_size)):
            dist = []
            prob_sum = 0
            for key in self.__model.word_topic(i):
                topic_id = key
                word_topic_count = self.__model.word_topic(i)[key]
                topic_sum = self.__model.topic_sum_value(topic_id)

                self.__topic_indexes[i].append(topic_id)
                q = word_topic_count / (topic_sum + self.__model.beta_sum())
                dist.append(q)
                prob_sum += q
            self.__prob_sum[i] = prob_sum
            if len(dist) > 0:
                dist = np.array(dist, dtype=np.float)
                self.__alias_tables[i].initialize(dist)

        # Build prior parameter beta's alias table.
        beta_dist = self.__model.beta() / (self.__model.topic_sum() + self.__model.beta_sum())
        self.__beta_prior_sum = np.sum(beta_dist)
        self.__beta_alias.initialize(beta_dist)
Esempio n. 2
0
    def predict(self, images=[], paths=[]):
        """
        Get the text box in the predicted images.
        Args:
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
            paths (list[str]): The paths of images. If paths not images
        Returns:
            res (list): The result of text detection box and save path of images.
        """

        if images != [] and isinstance(images, list) and paths == []:
            predicted_data = images
        elif images == [] and isinstance(paths, list) and paths != []:
            predicted_data = self.read_images(paths)
        else:
            raise TypeError("The input data is inconsistent with expectations.")

        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."

        all_results = []
        for img in predicted_data:
            if img is None:
                logger.info("error in loading image")
                all_results.append([])
                continue
            dt_boxes, elapse = self.text_detector(img)
            logger.info("Predict time : {}".format(elapse))

            rec_res_final = []
            for dno in range(len(dt_boxes)):
                rec_res_final.append({
                    'text_region': dt_boxes[dno].astype(np.int).tolist()
                })
            all_results.append(rec_res_final)
        return all_results
Esempio n. 3
0
    def _predictor(self, args, exe, place):
        predictor_scope = fluid.Scope()
        with fluid.scope_guard(predictor_scope):
            predictor_startup_prog = fluid.default_startup_program()
            predictor_main_prog = fluid.default_main_program()
            with fluid.program_guard(predictor_main_prog,
                                     predictor_startup_prog):
                # parse config
                predictor_config = parse_config(args.predictor_config)
                predictor_infer_config = merge_configs(predictor_config,
                                                       'infer', vars(args))

                predictor_model = models.get_model(
                    "AttentionLSTM", predictor_infer_config, mode='infer')
                predictor_model.build_input(use_dataloader=False)
                predictor_model.build_model()
                predictor_feeds = predictor_model.feeds()
                predictor_outputs = predictor_model.outputs()

                exe.run(predictor_startup_prog)

                logger.info('load lstm weights from {}'.format(
                    args.predictor_weights))
                predictor_model.load_test_weights(exe, args.predictor_weights,
                                                  predictor_main_prog)

                predictor_feeder = fluid.DataFeeder(
                    place=place, feed_list=predictor_feeds)
                predictor_fetch_list = predictor_model.fetches()
        return predictor_main_prog, predictor_fetch_list, predictor_feeder, predictor_scope
Esempio n. 4
0
    def __init__(self, sub_dataset='SST-2'):
        # sub_dataset : CoLA, MNLI, MRPC, QNLI, QQP, RTE, SST-2, STS-B
        if sub_dataset not in [
                'CoLA', 'MNLI', 'MNLI_m', 'MNLI_mm', 'MRPC', 'QNLI', 'QQP',
                'RTE', 'SST-2', 'STS-B'
        ]:
            raise Exception(
                sub_dataset +
                " is not in GLUE benchmark. Please confirm the data set")
        self.mismatch = False
        if sub_dataset == 'MNLI_mm':
            sub_dataset = 'MNLI'
            self.mismatch = True
        elif sub_dataset == 'MNLI_m':
            sub_dataset = 'MNLI'
        self.sub_dataset = sub_dataset
        self.dataset_dir = os.path.join(DATA_HOME, "glue_data")

        if not os.path.exists(self.dataset_dir):
            ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
                url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))

        self._load_train_examples()
        self._load_dev_examples()
        self._load_test_examples()
        self._load_predict_examples()
Esempio n. 5
0
    def encode(self, text):
        if len(self.serving_list) == 0:
            logger.error('No match server.')
            return -1
        if type(text) != list:
            raise TypeError('Only support list')
        request_msg = self.prepare_data(text)

        response_msg = self.request_server(request_msg)
        retry = 0
        while type(response_msg) == str and response_msg == 'retry':
            if retry < self.retry:
                retry += 1
                logger.info('Try to connect another servers')
                response_msg = self.request_server(request_msg)
            else:
                logger.error('Request failed after {} times retry'.format(
                    self.retry))
                break
        result = []
        for msg in response_msg["instances"]:
            for sample in msg["instances"]:
                result.append(sample["values"])

        return result
Esempio n. 6
0
def finetune(args):
    # Load Paddlehub pretrained model, default as mobilenet
    module = hub.Module(name=args.module)
    input_dict, output_dict, program = module.context(trainable=True)

    # Download dataset and use ImageClassificationReader to read dataset
    dataset = hub.dataset.Flowers()
    data_reader = hub.reader.ImageClassificationReader(
        image_width=module.get_expected_image_width(),
        image_height=module.get_expected_image_height(),
        images_mean=module.get_pretrained_images_mean(),
        images_std=module.get_pretrained_images_std(),
        dataset=dataset)

    # The last 2 layer of resnet_v2_101_imagenet network
    feature_map = output_dict["feature_map"]

    img = input_dict["image"]
    feed_list = [img.name]

    # Select finetune strategy, setup config and finetune
    strategy = hub.DefaultFinetuneStrategy(learning_rate=args.learning_rate)
    config = hub.RunConfig(
        use_cuda=True,
        num_epoch=args.epochs,
        batch_size=args.batch_size,
        checkpoint_dir=args.checkpoint_dir,
        strategy=strategy)

    # Construct transfer learning network
    task = hub.ImageClassifierTask(
        data_reader=data_reader,
        feed_list=feed_list,
        feature=feature_map,
        num_classes=dataset.num_labels,
        config=config)

    # Load model from the defined model path or not
    if args.model_path != "":
        with task.phase_guard(phase="train"):
            task.init_if_necessary()
            task.load_parameters(args.model_path)
            logger.info("PaddleHub has loaded model from %s" % args.model_path)

    # Finetune by PaddleHub's API
    task.finetune()
    # Evaluate by PaddleHub's API
    run_states = task.eval()
    # Get acc score on dev
    eval_avg_score, eval_avg_loss, eval_run_speed = task._calculate_metrics(
        run_states)

    # Move ckpt/best_model to the defined saved parameters directory
    best_model_dir = os.path.join(config.checkpoint_dir, "best_model")
    if is_path_valid(args.saved_params_dir) and os.path.exists(best_model_dir):
        shutil.copytree(best_model_dir, args.saved_params_dir)
        shutil.rmtree(config.checkpoint_dir)

    # acc on dev will be used by auto finetune
    hub.report_final_result(eval_avg_score["acc"])
Esempio n. 7
0
    def __init__(self,
                 log_interval=10,
                 eval_interval=100,
                 use_pyreader=False,
                 use_data_parallel=False,
                 save_ckpt_interval=None,
                 use_cuda=True,
                 checkpoint_dir=None,
                 num_epoch=1,
                 batch_size=32,
                 enable_memory_optim=True,
                 strategy=None):
        """ Construct finetune Config """
        self._log_interval = log_interval
        self._eval_interval = eval_interval
        self._save_ckpt_interval = save_ckpt_interval
        self._use_cuda = use_cuda
        self._checkpoint_dir = checkpoint_dir
        self._num_epoch = num_epoch
        self._batch_size = batch_size
        self._use_pyreader = use_pyreader
        self._use_data_parallel = use_data_parallel
        if strategy is None:
            self._strategy = DefaultStrategy()
        else:
            self._strategy = strategy
        self._enable_memory_optim = enable_memory_optim
        if checkpoint_dir is None:

            now = int(time.time())
            time_str = time.strftime("%Y%m%d%H%M%S", time.localtime(now))
            self._checkpoint_dir = "ckpt_" + time_str
        else:
            self._checkpoint_dir = checkpoint_dir
        logger.info("Checkpoint dir: {}".format(self._checkpoint_dir))
Esempio n. 8
0
    def prepare_data(self, text):
        self.batch_size = len(text)
        data_generator = self.reader.data_generator(
            batch_size=self.batch_size, phase='predict', data=text)
        request_msg = ""
        for run_step, batch in enumerate(data_generator(), start=1):
            request = []
            token_list = batch[0][0].reshape(-1).tolist()
            pos_list = batch[0][1].reshape(-1).tolist()
            sent_list = batch[0][2].reshape(-1).tolist()
            mask_list = batch[0][3].reshape(-1).tolist()
            for si in range(self.batch_size):
                instance_dict = {}
                instance_dict["token_ids"] = token_list[si * self.max_seq_len:(
                    si + 1) * self.max_seq_len]
                instance_dict["sentence_type_ids"] = sent_list[
                    si * self.max_seq_len:(si + 1) * self.max_seq_len]
                instance_dict["position_ids"] = pos_list[si * self.max_seq_len:(
                    si + 1) * self.max_seq_len]
                instance_dict["input_masks"] = mask_list[si * self.max_seq_len:(
                    si + 1) * self.max_seq_len]
                request.append(instance_dict)

            request = {"instances": request}
            request["max_seq_len"] = self.max_seq_len
            request["feed_var_names"] = self.feed_var_names
            request_msg = ujson.dumps(request)
            if self.show_ids:
                logger.info(request_msg)

        return request_msg
Esempio n. 9
0
    def _convert_examples_to_records(self, examples, phase):
        """
        Returns a list[dict] including all the input information what the model need.
        Args:
            examples (list): the data examples, returned by _read_file.
            phase (str): the processing phase, can be "train" "dev" "test" or "predict".
        Returns:
            a list with all the examples record.
        """
        records = []
        with tqdm(total=len(examples)) as process_bar:
            for example in examples:
                record = self.tokenizer.encode(text=example.text_a,
                                               text_pair=example.text_b,
                                               max_seq_len=self.max_seq_len)

                # CustomTokenizer will tokenize the text firstly and then lookup words in the vocab
                # When all words are not found in the vocab, the text will be dropped.
                if not record:
                    logger.info(
                        "The text %s has been dropped as it has no words in the vocab after tokenization."
                        % example.text_a)
                    continue

                if example.label:
                    record["label"] = [int(label) for label in example.label]
                records.append(record)
                process_bar.update(1)
        return records
Esempio n. 10
0
    def __init__(self,
                 dataset,
                 vocab_path,
                 label_map_config=None,
                 max_seq_len=512,
                 do_lower_case=True,
                 random_seed=None):
        self.max_seq_len = max_seq_len
        self.tokenizer = tokenization.FullTokenizer(
            vocab_file=vocab_path, do_lower_case=do_lower_case)
        self.vocab = self.tokenizer.vocab
        self.dataset = dataset
        self.pad_id = self.vocab["[PAD]"]
        self.cls_id = self.vocab["[CLS]"]
        self.sep_id = self.vocab["[SEP]"]
        self.in_tokens = False

        np.random.seed(random_seed)

        # generate label map
        self.label_map = {}
        for index, label in enumerate(self.dataset.get_labels()):
            self.label_map[label] = index
        logger.info("Dataset label map = {}".format(self.label_map))

        self.current_example = 0
        self.current_epoch = 0

        self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
Esempio n. 11
0
def predict_by_model_path(args, model_path, schema_labels, predict_data,
                          predict_sents, id):
    seq_label_task, reader = get_task(args, schema_labels, predict_data,
                                      predict_sents, id)
    seq_label_task.init_if_necessary()
    seq_label_task.load_parameters(model_path)
    logger.info("PaddleHub has loaded model from %s" % model_path)
    if args.do_predict:
        print("start predict process")
        ret = []
        id2label = {val: key for key, val in reader.label_map.items()}
        input_data = [[d] for d in predict_data]
        run_states = seq_label_task.predict(data=input_data[1:])
        results = []
        for batch_states in run_states:
            batch_results = batch_states.run_results
            batch_infers = batch_results[0].reshape([-1]).astype(
                np.int32).tolist()
            seq_lens = batch_results[1].reshape([-1]).astype(np.int32).tolist()
            current_id = 0
            for length in seq_lens:
                seq_infers = batch_infers[current_id:current_id + length]
                seq_result = list(map(id2label.get, seq_infers[1:-1]))
                current_id += length if args.add_crf else args.max_seq_len
                results.append(seq_result)

        ret = []
        for sent, r_label in zip(predict_sents, results):
            sent["labels"] = r_label
            ret.append(json.dumps(sent, ensure_ascii=False))
        write_by_lines(
            "{}.{}.{}.pred".format(output_predict_data_path, args.do_model,
                                   id), ret)
Esempio n. 12
0
    def _extractor(self, args, exe, place):
        extractor_scope = fluid.Scope()
        with fluid.scope_guard(extractor_scope):
            extractor_startup_prog = fluid.Program()
            extractor_main_prog = fluid.Program()
            with fluid.program_guard(extractor_main_prog,
                                     extractor_startup_prog):
                extractor_config = parse_config(args.extractor_config)
                extractor_infer_config = merge_configs(extractor_config,
                                                       'infer', vars(args))

                # build model
                extractor_model = models.get_model(
                    "TSN", extractor_infer_config, mode='infer')
                extractor_model.build_input(use_dataloader=False)
                extractor_model.build_model()
                extractor_feeds = extractor_model.feeds()
                extractor_fetch_list = extractor_model.fetches()

                exe.run(extractor_startup_prog)

                logger.info('load extractor weights from {}'.format(
                    args.extractor_weights))
                extractor_model.load_test_weights(exe, args.extractor_weights,
                                                  extractor_main_prog)

                extractor_feeder = fluid.DataFeeder(
                    place=place, feed_list=extractor_feeds)
        return extractor_main_prog, extractor_fetch_list, extractor_feeder, extractor_scope
Esempio n. 13
0
    def __load_word_dict(self, word_dict_path):
        """Load the word topic parameters.
        """
        logger.info("Loading word topic.")
        with open(word_dict_path, 'r', encoding='utf-8') as f:
            for line in tqdm(f.readlines()):
                fields = line.strip().split(" ")
                assert len(fields) > 0, "Model file format error!"
                term_id = int(fields[0])
                assert term_id < self.vocab_size(), "Term id out of range!"
                assert term_id >= 0, "Term id out of range!"
                for i in range(1, len(fields)):
                    topic_count = fields[i].split(":")
                    assert len(topic_count) == 2, "Topic count format error!"

                    topic_id = int(topic_count[0])
                    assert topic_id >= 0, "Topic out of range!"
                    assert topic_id < self.__num_topics, "Topic out of range!"

                    count = int(topic_count[1])
                    assert count >= 0, "Topic count error!"

                    self.__word_topic[term_id][topic_id] = count
                    self.__topic_sum[topic_id] += count
                    self.__topic_words[topic_id].append(
                        WordCount(term_id, count))
                new_dict = OrderedDict()
                for key in sorted(self.__word_topic[term_id]):
                    new_dict[key] = self.__word_topic[term_id][key]
                self.__word_topic[term_id] = new_dict
Esempio n. 14
0
    def export(self,
               params_path,
               module_name,
               author,
               version="1.0.0",
               summary="",
               author_email="",
               export_path="."):
        """
        export the model saved in the params_path to a hub module.

        Args:
            params_path(str): the model params save path.
            module_name(str): the module name.
            author(str): the author name.
            version(str): the version information.
            summary(str): the module brief introduction.
            author_email(str): the author email address.
            export_path(str): the module export path.
        """
        if not os.path.exists(params_path):
            raise FileNotFoundError("The path %s does not exist." %
                                    params_path)
        export_module_path = os.path.join(export_path, module_name)
        if not os.path.exists(export_module_path):
            os.makedirs(export_module_path)
        logger.info("Begin export the model save in %s ..." % params_path)

        assets_path = os.path.join(self.directory, "template", "assets")
        model_path = os.path.join(self.directory, "template", "model")
        init_path = os.path.join(self.directory, "template", "__init__.py")
        module_temp_path = os.path.join(self.directory, "template",
                                        "module.temp")

        export_assets_path = os.path.join(export_module_path, "assets")
        export_params_path = os.path.join(export_module_path, "assets",
                                          "ernie_gen.pdparams")
        export_init_path = os.path.join(export_module_path, "__init__.py")
        export_model_path = os.path.join(export_module_path, "model")

        shutil.copyfile(init_path, export_init_path)
        shutil.copytree(assets_path, export_assets_path)
        shutil.copyfile(params_path, export_params_path)
        shutil.copytree(model_path, export_model_path)

        module_path = os.path.join(export_module_path, "module.py")
        with open(module_temp_path,
                  encoding="utf8") as ftemp, open(module_path, "w") as fmodule:
            content = ftemp.read().replace(
                r"{module_name}",
                module_name).replace(r"{author}", author).replace(
                    r"{version}",
                    version).replace(r"{summary}",
                                     summary).replace(r"{author_email}",
                                                      author_email)
            fmodule.write(content)

        logger.info("The module has exported to %s" %
                    os.path.abspath(export_module_path))
Esempio n. 15
0
 def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True):
     detector_dir = os.path.join(dirname, 'text_detector')
     classifier_dir = os.path.join(dirname, 'angle_classifier')
     recognizer_dir = os.path.join(dirname, 'text_recognizer')
     self._save_detector_model(detector_dir, model_filename, params_filename, combined)
     self._save_classifier_model(classifier_dir, model_filename, params_filename, combined)
     self._save_recognizer_model(recognizer_dir, model_filename, params_filename, combined)
     logger.info("The inference model has been saved in the path {}".format(os.path.realpath(dirname)))
Esempio n. 16
0
    def _build_net(self):
        if self.network:
            self.seq_len_1 = fluid.layers.data(name="seq_len",
                                               shape=[1],
                                               dtype='int64',
                                               lod_level=0)
            self.seq_len_1_used = fluid.layers.squeeze(self.seq_len_1,
                                                       axes=[1])

            self.seq_len_2 = fluid.layers.data(name="seq_len_2",
                                               shape=[1],
                                               dtype='int64',
                                               lod_level=0)
            self.seq_len_2_used = fluid.layers.squeeze(self.seq_len_2,
                                                       axes=[1])

            # unpad the token_feature
            query_unpad = fluid.layers.sequence_unpad(
                self.query_feature, length=self.seq_len_1_used)
            title_unpad = fluid.layers.sequence_unpad(
                self.title_feature, length=self.seq_len_2_used)

            # add pre-defined net
            net_func = getattr(net.matching, self.network)
            if self.is_train_phase or self.is_predict_phase:
                logger.info(
                    "%s has been added in the PointwiseTextMatchingTask!" %
                    self.network)

            query_feats, title_feats = net_func(query_unpad, title_unpad)
            title_concat = fluid.layers.concat(
                input=[query_feats, title_feats], axis=1)
        else:
            query_feats = fluid.layers.dropout(
                x=self.query_feature,
                dropout_prob=0.1,
                dropout_implementation="upscale_in_train")
            title_feats = fluid.layers.dropout(
                x=self.title_feature,
                dropout_prob=0.1,
                dropout_implementation="upscale_in_train")
            title_concat = fluid.layers.concat(
                input=[query_feats, title_feats], axis=-1)

        score = fluid.layers.fc(
            input=title_concat,
            size=2,
            param_attr=fluid.ParamAttr(
                name="matching_out_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02),
            ),
            bias_attr=fluid.ParamAttr(
                name="matching_out_b",
                initializer=fluid.initializer.Constant(0.),
            ),
            act="softmax")

        return [score]
Esempio n. 17
0
    def _evaluate(self, model, data_loader, tokenizer, rouge1, rouge2, attn_id, max_decode_len, max_encode_len,
                  beam_width, length_penalty):
        paddle.disable_static()
        model.eval()

        vocab = tokenizer.vocab
        eos_id = vocab[tokenizer.sep_token]
        sos_id = vocab[tokenizer.cls_token]
        pad_id = vocab[tokenizer.pad_token]
        unk_id = vocab[tokenizer.unk_token]
        vocab_size = len(vocab)
        evaluated_sentences_ids = []
        reference_sentences_ids = []
        logger.info("Evaluating...")
        for data in data_loader:
            (src_ids, src_tids, src_pids, _, _, _, _, _, _, _, _, raw_tgt_labels) = data  # never use target when infer
            # Use greedy_search_infilling or beam_search_infilling to get predictions
            output_ids = beam_search_infilling(model,
                                               src_ids,
                                               src_tids,
                                               eos_id=eos_id,
                                               sos_id=sos_id,
                                               attn_id=attn_id,
                                               pad_id=pad_id,
                                               unk_id=unk_id,
                                               vocab_size=vocab_size,
                                               max_decode_len=max_decode_len,
                                               max_encode_len=max_encode_len,
                                               beam_width=beam_width,
                                               length_penalty=length_penalty,
                                               tgt_type_id=1)

            for ids in output_ids.tolist():
                if eos_id in ids:
                    ids = ids[:ids.index(eos_id)]
                evaluated_sentences_ids.append(ids[0])

            for ids in raw_tgt_labels.numpy().tolist():
                ids = ids[:ids.index(eos_id)]
                reference_sentences_ids.append(ids)

        score1 = rouge1.score(evaluated_sentences_ids, reference_sentences_ids)
        score2 = rouge2.score(evaluated_sentences_ids, reference_sentences_ids)

        logger.info("Rouge-1: %.5f ,Rouge-2: %.5f" % (score1 * 100, score2 * 100))

        evaluated_sentences = []
        reference_sentences = []
        for ids in reference_sentences_ids[:3]:
            reference_sentences.append(''.join(map(post_process, vocab.to_tokens(ids))))
        for ids in evaluated_sentences_ids[:3]:
            evaluated_sentences.append(''.join(map(post_process, vocab.to_tokens(ids))))
        logger.debug(reference_sentences)
        logger.debug(evaluated_sentences)

        model.train()
Esempio n. 18
0
    def classify_pose_in_euler_angles(self, img):
        self.img_size = img.shape

        success, face_landmark = self.get_face_landmark(img, False)

        if not success:
            logger.info(
                "Get face landmark localization failed! Please check your image!"
            )
            return None

        image_points = self.get_image_points_from_landmark(face_landmark)
        success, rotation_vector, translation_vector, camera_matrix, dist_coeffs, reprojectdst = self.caculate_pose_vector(
            image_points)

        if not success:
            logger.info("Get rotation and translation vectors failed!")
            return None

        # 画出投影正方体
        alpha = 0.3
        if not hasattr(self, 'before'):
            self.before = reprojectdst
        else:
            reprojectdst = alpha * self.before + (1 - alpha) * reprojectdst
        reprojectdst = tuple(map(tuple, reprojectdst.reshape(8, 2)))
        for start, end in self.line_pairs:
            cv2.line(img, reprojectdst[start], reprojectdst[end], (0, 0, 255))

        # 计算头部欧拉角
        pitch, yaw, roll = self.caculate_euler_angle(rotation_vector,
                                                     translation_vector)
        cv2.putText(img,
                    "pitch: " + "{:7.2f}".format(pitch),
                    (20, int(self.img_size[0] / 2 - 10)),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75, (0, 0, 255),
                    thickness=2)
        cv2.putText(img,
                    "yaw: " + "{:7.2f}".format(yaw),
                    (20, int(self.img_size[0] / 2 + 30)),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75, (0, 0, 255),
                    thickness=2)
        cv2.putText(img,
                    "roll: " + "{:7.2f}".format(roll),
                    (20, int(self.img_size[0] / 2 + 70)),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75, (0, 0, 255),
                    thickness=2)
        # for index, action in enumerate(index_action):
        #     cv2.putText(img, "{}".format(self._index_action[action]), index_action[action][1],
        #             cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50, 50, 50), thickness=2)
        # frames_euler.append([index, img, pitch, yaw, roll])

        return img
Esempio n. 19
0
 def __init__(self, version_2_with_negative=False):
     self.dataset_dir = os.path.join(DATA_HOME, "squad_data")
     if not os.path.exists(self.dataset_dir):
         ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
             url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
     else:
         logger.info("Dataset {} already cached.".format(self.dataset_dir))
     self.version_2_with_negative = version_2_with_negative
     self._load_train_examples(version_2_with_negative, if_has_answer=True)
     self._load_dev_examples(version_2_with_negative, if_has_answer=True)
Esempio n. 20
0
    def delete_hook(self, hook_type, name):
        """
        delete the handler function of spectific event.

        Args:
            hook_type (str): the spectific event name
            name (str): the handler function name
        """
        self._hooks.delete(hook_type, name)
        logger.info("Delete hook %s:%s successfully" % (hook_type, name))
    def __init__(self,dataset_dir):
        self.dataset_dir = dataset_dir
        if not os.path.exists(self.dataset_dir):
            logger.info("Dataset not exists.".format(self.dataset_dir))
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))

        self._load_train_examples()
        self._load_test_examples()
        self._load_dev_examples()
Esempio n. 22
0
    def load_model(self, word_topic_path, vocab_path):

        # Loading vocabulary
        self.__vocab.load(vocab_path)

        self.__beta_sum = self.__beta * self.__vocab.size()
        self.__word_topic = [{} for _ in range(self.__vocab.size())]  # 字典列表
        self.__load_word_dict(word_topic_path)
        logger.info("Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" %
                    (self.num_topics(), self.vocab_size(), self.alpha(), self.beta()))
Esempio n. 23
0
 def read_images(self, paths=[]):
     images = []
     for img_path in paths:
         assert os.path.isfile(img_path), "The {} isn't a valid file.".format(img_path)
         img = cv2.imread(img_path)
         if img is None:
             logger.info("error in loading image:{}".format(img_path))
             continue
         images.append(img)
     return images
Esempio n. 24
0
    def _build_net(self):
        if not isinstance(self._base_data_reader, LACClassifyReader):
            # LACClassifyReader won't return the seqence length, while Dataset with tokenizer and ClassifyReader will.
            self.seq_len = fluid.layers.data(name="seq_len",
                                             shape=[1],
                                             dtype='int64',
                                             lod_level=0)
            self.seq_len_used = fluid.layers.squeeze(self.seq_len, axes=[1])
            # unpad the token_feature
            unpad_feature = fluid.layers.sequence_unpad(
                self.feature, length=self.seq_len_used)
        if self.network:
            # add pre-defined net
            net_func = getattr(net.classification, self.network)
            if self.network == 'dpcnn':
                # deepcnn network is no need to unpad
                cls_feats = net_func(self.feature,
                                     emb_dim=self.feature.shape[-1])
            else:
                if self._compatible_mode and isinstance(
                        self._base_data_reader, LACClassifyReader):
                    cls_feats = net_func(self.feature)
                else:
                    cls_feats = net_func(unpad_feature)
            if self.is_train_phase or self.is_predict_phase:
                logger.info("%s has been added in the TextClassifierTask!" %
                            self.network)
        else:
            # not use pre-defined net but to use fc net
            cls_feats = fluid.layers.dropout(
                x=self.feature,
                dropout_prob=0.1,
                dropout_implementation="upscale_in_train")

        if self.hidden_units is not None:
            for n_hidden in self.hidden_units:
                cls_feats = fluid.layers.fc(input=cls_feats,
                                            size=n_hidden,
                                            act="relu")

        logits = fluid.layers.fc(
            input=cls_feats,
            size=self.num_classes,
            param_attr=fluid.ParamAttr(
                name="cls_out_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=fluid.ParamAttr(
                name="cls_out_b", initializer=fluid.initializer.Constant(0.)),
            act="softmax")

        self.ret_infers = fluid.layers.reshape(x=fluid.layers.argmax(logits,
                                                                     axis=1),
                                               shape=[-1, 1])

        return [logits]
Esempio n. 25
0
    def __init__(self):
        self.dataset_dir = os.path.join(DATA_HOME, "msra_ner")
        if not os.path.exists(self.dataset_dir):
            ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
                url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))

        self._load_train_examples()
        self._load_test_examples()
        self._load_dev_examples()
Esempio n. 26
0
    def modify_hook(self, hook_type, name, func):
        """
         modify the handler function of spectific event.

         Args:
             hook_type (str): the spectific event name
             name (str): the handler function name
             func (func): the new handler function
         """
        self._hooks.modify(hook_type, name, func)
        logger.info("Modify hook %s:%s successfully" % (hook_type, name))
Esempio n. 27
0
 def save_checkpoint(self):
     model_saved_dir = os.path.join(self.config.checkpoint_dir,
                                    "step_%d" % self.current_step)
     logger.info("Saving model checkpoint to {}".format(model_saved_dir))
     self.save_inference_model(dirname=model_saved_dir)
     save_checkpoint(checkpoint_dir=self.config.checkpoint_dir,
                     current_epoch=self.current_epoch,
                     global_step=self.current_step,
                     best_score=self.best_score,
                     exe=self.exe,
                     main_program=self.main_program)
Esempio n. 28
0
    def _log_interval_event(self, run_states):
        avg_loss, auc_list, run_speed = self._calculate_metrics(run_states)

        self.env.loss_scalar.add_record(self.current_step, avg_loss)
        avg_auc = np.mean(auc_list)
        self.env.avg_auc_scalar.add_record(self.current_step, avg_auc)
        logger.info("step %d: loss=%.5f avg_auc=%.5f [step/sec: %.2f]" %
                    (self.current_step, avg_loss, avg_auc, run_speed))
        for index, auc_scalar in enumerate(self.env.auc_scalar_list):
            auc_scalar.add_record(self.current_step, auc_list[index][0])
            logger.info("label_%d_auc = %.5f" % (index, auc_list[index][0]))
Esempio n. 29
0
 def _download_dataset(self, dataset_path, url):
     if not os.path.exists(dataset_path):
         result, tips, dataset_path = default_downloader.download_file_and_uncompress(
             url=url,
             save_path=hub.common.dir.DATA_HOME,
             print_progress=True,
             replace=True)
         if not result:
             raise Exception(tips)
     else:
         logger.info("Dataset {} already cached.".format(dataset_path))
     return dataset_path
Esempio n. 30
0
 def _init_with_name(self, name, version=None):
     log_msg = "Installing %s module" % name
     if version:
         log_msg += "-%s" % version
     logger.info(log_msg)
     result, tips, module_dir = default_module_manager.install_module(
         module_name=name, module_version=version)
     if not result:
         logger.error(tips)
         exit(1)
     logger.info(tips)
     self._init_with_module_file(module_dir[0])