Esempio n. 1
0
    def __proportional_function(self, doc, token, new_topic):
        if isinstance(doc, LDADoc) and isinstance(token, Token):
            old_topic = token.topic
            dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha()
            wt_beta = self.__model.word_topic_value(token.id, new_topic) + self.__model.beta()
            t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum()
            if new_topic == old_topic and wt_beta > 1:
                if dt_alpha > 1:
                    dt_alpha -= 1
                wt_beta -= 1
                t_sum_beta_sum -= 1
            return dt_alpha * wt_beta / t_sum_beta_sum

        elif isinstance(doc, SLDADoc) and isinstance(token, Sentence):
            sent = token
            old_topic = sent.topic
            result = doc.topic_sum(new_topic) + self.__model.alpha()
            if new_topic == old_topic:
                result -= 1
            for word_id in sent.tokens:
                wt_beta = self.__model.word_topic_value(word_id, new_topic) + self.__model.beta()
                t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum()
                if new_topic == old_topic and wt_beta > 1:
                    wt_beta -= 1
                    t_sum_beta_sum -= 1
                result *= wt_beta / t_sum_beta_sum
            return result
        else:
            logger.error("Wrong input argument type!")
Esempio n. 2
0
 def infer(self, input, doc):
     """Perform LDA topic inference on input, and store the results in doc.
     Args:
         input: a list of strings after tokenization.
         doc: LDADoc type or SLDADoc type.
     """
     fix_random_seed()
     if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc):
         doc.init(self.__model.num_topics())
         doc.set_alpha(self.__model.alpha())
         for token in input:
             id_ = self.__model.term_id(token)
             if id_ != OOV:
                 init_topic = rand_k(self.__model.num_topics())
                 doc.add_token(Token(init_topic, id_))
         self.lda_infer(doc, 20, 50)
     elif isinstance(doc, SLDADoc):
         doc.init(self.__model.num_topics())
         doc.set_alpha(self.__model.alpha())
         for sent in input:
             words = []
             for token in sent:
                 id_ = self.__model.term_id(token)
                 if id_ != OOV:
                     words.append(id_)
             init_topic = rand_k(self.__model.num_topics())
             doc.add_sentence(Sentence(init_topic, words))
         self.slda_infer(doc, 20, 50)
     else:
         logger.error("Wrong Doc Type!")
Esempio n. 3
0
    def check_server(self):
        for server in self.server_list:
            client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            server_ip = server.split(':')[0]
            server_port = int(server.split(':')[1])
            client.connect((server_ip, server_port))
            client.send(b'pending server')
            response = client.recv(1024).decode()

            response_list = response.split('\t')
            status_code = int(response_list[0].split(':')[1])

            if status_code == 0:
                server_model = response_list[1].split(':')[1]
                if server_model == self.model_name:
                    serving_port = response_list[2].split(':')[1]
                    serving_ip = server_ip
                    self.serving_list.append(serving_ip + ':' + serving_port)
                else:
                    logger.error(
                        'model_name not match, server {}  using : {} '.format(
                            server, server_model))
            else:
                error_msg = response_list[1]
                logger.error('connect server {} failed. {}'.format(
                    server, error_msg))
Esempio n. 4
0
    def get_face_landmark(self, image):
        """
        预测人脸的68个关键点坐标
        images(ndarray): 单张图片的像素数据
        """
        try:
            # 选择GPU运行,use_gpu=True,并且在运行整个教程代码之前设置CUDA_VISIBLE_DEVICES环境变量
            res = self.module.keypoint_detection(images=[image], use_gpu=False)
            
            img = image

            tmp_img = image.copy()
            for index, point in enumerate(res[0]['data'][0]):
                # cv2.putText(img, str(index), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_COMPLEX, 3, (0,0,255), -1)
                cv2.circle(tmp_img, (int(point[0]), int(point[1])), 2, (0, 0, 255), -1)

            res_img_path = 'face_landmark.jpg'
            cv2.imwrite(res_img_path, tmp_img)

            img = mpimg.imread(res_img_path) 
            # 展示预测68个关键点结果
            #下面用于绘图
            # plt.figure(figsize=(10,10))
            # plt.imshow(img) 
            # plt.axis('off') 
            #plt.show()
            return True, res[0]['data'][0],img
        except Exception as e:
            logger.error("Get face landmark localization failed! Exception: %s " % e)
            return False, None , None
Esempio n. 5
0
    def encode(self, text):
        if len(self.serving_list) == 0:
            logger.error('No match server.')
            return -1
        if type(text) != list:
            raise TypeError('Only support list')
        request_msg = self.prepare_data(text)

        response_msg = self.request_server(request_msg)
        retry = 0
        while type(response_msg) == str and response_msg == 'retry':
            if retry < self.retry:
                retry += 1
                logger.info('Try to connect another servers')
                response_msg = self.request_server(request_msg)
            else:
                logger.error('Request failed after {} times retry'.format(
                    self.retry))
                break
        result = []
        for msg in response_msg["instances"]:
            for sample in msg["instances"]:
                result.append(sample["values"])

        return result
Esempio n. 6
0
 def _init_with_url(self, url):
     utils.check_url(url)
     result, tips, module_dir = default_downloader.download_file_and_uncompress(
         url, save_path=".")
     if not result:
         logger.error(tips)
         exit(1)
     self._init_with_module_file(module_dir)
Esempio n. 7
0
 def check_requirements(self):
     try:
         import shapely, pyclipper
     except:
         logger.error(
             'This module requires the shapely, pyclipper tools. The running enviroment does not meet the requirments. Please install the two packages.'
         )
         exit()
Esempio n. 8
0
 def get_face_landmark(self, image, use_gpu=False):
     try:
         res = self.module.keypoint_detection(images=[image],
                                              use_gpu=use_gpu)
         return True, res[0]['data'][0]
     except Exception as e:
         logger.error(
             "Get face landmark localization failed! Exception: %s " % e)
         return False, None
Esempio n. 9
0
 def _init_with_name(self, name, version=None):
     log_msg = "Installing %s module" % name
     if version:
         log_msg += "-%s" % version
     logger.info(log_msg)
     result, tips, module_dir = default_module_manager.install_module(
         module_name=name, module_version=version)
     if not result:
         logger.error(tips)
         exit(1)
     logger.info(tips)
     self._init_with_module_file(module_dir[0])
 def get_face_landmark(self, image):
     """
     预测人脸的68个关键点坐标
     images(ndarray): 单张图片的像素数据
     """
     try:
         res = self.module.keypoint_detection(images=[image])
         return True, res[0]['data'][0]
     except Exception as e:
         logger.error(
             "Get face landmark localization failed! Exception: %s " % e)
         return False, None
 def get_face_landmark(self, image):
     """
     预测人脸的68个关键点坐标
     images(ndarray): 单张图片的像素数据
     """
     try:
         # 选择GPU运行,use_gpu=True,并且在运行整个教程代码之前设置CUDA_VISIBLE_DEVICES环境变量
         res = self.module.keypoint_detection(images=[image], use_gpu=False)
         return True, res[0]['data'][0]
     except Exception as e:
         logger.error(
             "Get face landmark localization failed! Exception: %s " % e)
         return False, None
Esempio n. 12
0
 def _init_with_name(self, name, version=None):
     log_msg = "Installing %s module" % name
     if version:
         log_msg += "-%s" % version
     logger.info(log_msg)
     extra = {"command": "install"}
     result, tips, module_dir = default_module_manager.install_module(
         module_name=name, module_version=version, extra=extra)
     if not result:
         logger.error(tips)
         raise RuntimeError(tips)
     else:
         logger.info(tips)
         self._init_with_module_file(module_dir[0])
Esempio n. 13
0
 def load(self, vocab_file):
     self.__term2id = {}
     self.__id2term = {}
     with open(vocab_file, 'r', encoding='utf-8') as fin:
         for line in fin.readlines():
             fields = line.strip().split('\t')
             assert len(
                 fields) == 5, "Vocabulary file [%s] format error!" % (
                     vocab_file)
             term = fields[1]
             id_ = int(fields[2])
             if term in self.__term2id:
                 logger.error("Duplicate word [%s] in vocab file!" % (term))
                 continue
             self.__term2id[term] = id_
             self.__id2term[id_] = term
Esempio n. 14
0
    def init_with_name(cls, name, version=None, **kwargs):
        fp_lock = open(os.path.join(CACHE_HOME, name), "a")
        lock.flock(fp_lock, lock.LOCK_EX)
        log_msg = "Installing %s module" % name
        if version:
            log_msg += "-%s" % version
        logger.info(log_msg)
        extra = {"command": "install"}
        result, tips, module_dir = default_module_manager.install_module(
            module_name=name, module_version=version, extra=extra)
        if not result:
            logger.error(tips)
            raise RuntimeError(tips)

        logger.info(tips)
        lock.flock(fp_lock, lock.LOCK_UN)
        return cls.init_with_directory(directory=module_dir[0], **kwargs)
Esempio n. 15
0
    def show_topic_keywords(self, topic_id, k=10):
        """
        This interface returns first k keywords under specific topic.

        Args:
            topic_id(int): topic information we want to know.
            k(int): top k keywords.

        Returns:
            results(dict): contains specific topic's keywords and corresponding
                           probability.
        """
        EPS = 1e-8
        results = {}
        if 0 <= topic_id < self.config.num_topics:
            k = min(k, len(self.topic_words[topic_id]))
            for i in range(k):
                prob = self.topic_words[topic_id][i].count / \
                       (self.topic_sum_table[topic_id] + EPS)
                results[self.vocabulary[self.topic_words[topic_id][i].word_id]] = prob
            return results
        else:
            logger.error("%d is out of range!" % topic_id)
Esempio n. 16
0
    def request_server(self, request_msg):
        if self.load_balance == 'round_robin':
            try:
                cur_con = httplib.HTTPConnection(
                    self.serving_list[self.con_index])
                cur_con.request('POST', "/BertService/inference", request_msg,
                                {"Content-Type": "application/json"})
                response = cur_con.getresponse()
                response_msg = response.read()
                response_msg = ujson.loads(response_msg)
                self.con_index += 1
                self.con_index = self.con_index % len(self.serving_list)
                return response_msg

            except BaseException as err:
                logger.warning("Infer Error with server {} : {}".format(
                    self.serving_list[self.con_index], err))
                if len(self.serving_list) == 0:
                    logger.error('All server failed, process will exit')
                    return 'fail'
                else:
                    self.con_index += 1
                    return 'retry'

        elif self.load_balance == 'random':
            try:
                random.seed()
                self.con_index = random.randint(0, len(self.serving_list) - 1)
                logger.info(self.con_index)
                cur_con = httplib.HTTPConnection(
                    self.serving_list[self.con_index])
                cur_con.request('POST', "/BertService/inference", request_msg,
                                {"Content-Type": "application/json"})
                response = cur_con.getresponse()
                response_msg = response.read()
                response_msg = ujson.loads(response_msg)

                return response_msg
            except BaseException as err:

                logger.warning("Infer Error with server {} : {}".format(
                    self.serving_list[self.con_index], err))
                if len(self.serving_list) == 0:
                    logger.error('All server failed, process will exit')
                    return 'fail'
                else:
                    self.con_index = random.randint(0,
                                                    len(self.serving_list) - 1)
                    return 'retry'

        elif self.load_balance == 'bind':

            try:
                self.con_index = int(self.process_id) % len(self.serving_list)
                cur_con = httplib.HTTPConnection(
                    self.serving_list[self.con_index])
                cur_con.request('POST', "/BertService/inference", request_msg,
                                {"Content-Type": "application/json"})
                response = cur_con.getresponse()
                response_msg = response.read()
                response_msg = ujson.loads(response_msg)

                return response_msg
            except BaseException as err:

                logger.warning("Infer Error with server {} : {}".format(
                    self.serving_list[self.con_index], err))
                if len(self.serving_list) == 0:
                    logger.error('All server failed, process will exit')
                    return 'fail'
                else:
                    self.con_index = int(self.process_id) % len(
                        self.serving_list)
                    return 'retry'
Esempio n. 17
0
 def _load_test_examples(self,
                         version_2_with_negative=False,
                         is_training=False):
     self.test_file = None
     logger.error("not test_file")
Esempio n. 18
0
    def __init__(
            self,
            num_classes,
            dataset=None,
            feed_list=None,  # Deprecated
            data_reader=None,  # Deprecated
            feature=None,
            token_feature=None,
            network=None,
            startup_program=None,
            config=None,
            hidden_units=None,
            metrics_choices="default"):
        """
        Args:
            num_classes: total labels of the text classification task.
            feed_list(list): the variable name that will be feeded to the main program, Deprecated in paddlehub v1.8.
            data_reader(object): data reader for the task. It must be one of ClassifyReader and LACClassifyReader, Deprecated in paddlehub v1.8..
            feature(Variable): the `feature` will be used to classify texts. It must be the sentence-level feature, shape as [-1, emb_size]. `Token_feature` and `feature` couldn't be setted at the same time. One of them must be setted as not None. Default None.
            token_feature(Variable): the `feature` will be used to connect the pre-defined network. It must be the token-level feature, shape as [-1, seq_len, emb_size]. Default None.
            network(str): the pre-defined network. Choices: 'bilstm', 'bow', 'cnn', 'dpcnn', 'gru' and 'lstm'. Default None. If network is setted, then `token_feature` must be setted and `feature` must be None.
            startup_program (object): the customized startup program, default None.
            config (RunConfig): run config for the task, such as batch_size, epoch, learning_rate setting and so on. Default None.
            hidden_units(list): the element of `hidden_units` list is the full-connect layer size. It will add the full-connect layers to the program. Default None.
            metrics_choices(list): metrics used to the task, default ["acc"]. Choices: acc, precision, recall, f1, matthews.
        """
        if (not feature) and (not token_feature):
            logger.error(
                'Both token_feature and feature are None, one of them must be set.'
            )
            exit(1)
        elif feature and token_feature:
            logger.error(
                'Both token_feature and feature are set. One should be set, the other should be None.'
            )
            exit(1)

        if network:
            assert network in [
                'bilstm', 'bow', 'cnn', 'dpcnn', 'gru', 'lstm'
            ], 'network (%s) choice must be one of bilstm, bow, cnn, dpcnn, gru, lstm!' % network
            assert token_feature and (
                not feature
            ), 'If you wanna use network, you must set token_feature ranther than feature for TextClassifierTask!'
            assert len(
                token_feature.shape
            ) == 3, 'When you use network, the parameter token_feature must be the token-level feature([batch_size, max_seq_len, embedding_size]), shape as [-1, 128, 200].'
        else:
            assert feature and (
                not token_feature
            ), 'If you do not use network, you must set feature ranther than token_feature for TextClassifierTask!'
            assert len(
                feature.shape
            ) == 2, 'When you do not use network, the parameter feture must be the sentence-level feature ([batch_size, hidden_size]), such as the pooled_output of ERNIE, BERT, RoBERTa and ELECTRA module.'

        self.network = network

        if metrics_choices == "default":
            metrics_choices = ["acc"]

        super(TextClassifierTask,
              self).__init__(dataset=dataset,
                             data_reader=data_reader,
                             feature=feature if feature else token_feature,
                             num_classes=num_classes,
                             feed_list=feed_list,
                             startup_program=startup_program,
                             config=config,
                             hidden_units=hidden_units,
                             metrics_choices=metrics_choices)