def __init__(self, zh_model, text=""):
        """
        :param is_sectioned: If we want to section the text.
        :param zh_model: the pipeline of Chinese tokenization and POS-tagger
        :param considered_tags: The POSs we want to keep
        """
        self.considered_tags = {
            'n', 'np', 'ns', 'ni', 'nz', 'a', 'd', 'i', 'j', 'x', 'g'
        }

        self.tokens = []
        self.tokens_tagged = []
        # self.tokens = zh_model.cut(text)
        word_pos = zh_model.cut(text)
        self.tokens = [word_pos[0] for word_pos in word_pos]
        self.tokens_tagged = [(word_pos[0], word_pos[1])
                              for word_pos in word_pos]
        assert len(self.tokens) == len(self.tokens_tagged)
        for i, token in enumerate(self.tokens):
            if token.lower() in stopword_dict:
                self.tokens_tagged[i] = (token, "u")
            if token == '-':
                self.tokens_tagged[i] = (token, "-")
        self.keyphrase_candidate = extractor.extract_candidates(
            self.tokens_tagged, zh_model)
    def __init__(self, preprocess_model, text=""):
        """
        :param is_sectioned: If we want to section the text.
        :param en_model: the pipeline of tokenization and POS-tagger
        :param considered_tags: The POSs we want to keep
        """
        punctuation_connect = ["&", "và"]
        self.tokens = []
        self.tokens_tagged = []

        pos = preprocess_model.pos_tag(text)
        self.tokens = [_[0] for sent in pos for _ in sent]
        self.tokens_tagged = [(_[0], _[1]) for sent in pos for _ in sent]
        assert len(self.tokens) == len(self.tokens_tagged)

        for i, token in enumerate(self.tokens):
            if token.lower() in stopwords:
                self.tokens_tagged[i] = (token, "IN")
            if token.lower() in punctuation_connect:
                self.tokens_tagged[i] = (token, "PC")
        ### Trích xuất các cụm danh từ (NP)
        self.keyphrase_candidate = extractor.extract_candidates(
            self.tokens_tagged)
Beispiel #3
0
    def __init__(self, en_model, text=""):
        """
        :param is_sectioned: If we want to section the text.
        :param en_model: the pipeline of tokenization and POS-tagger
        :param considered_tags: The POSs we want to keep
        """
        self.considered_tags = {'NN', 'NNS', 'NNP', 'NNPS', 'JJ'}

        self.tokens = []
        self.tokens_tagged = []
        self.tokens = en_model.word_tokenize(text)
        self.tokens_tagged = en_model.pos_tag(text)
        assert len(self.tokens) == len(self.tokens_tagged)
        for i, token in enumerate(self.tokens):
            if token.lower() in stopword_dict:
                self.tokens_tagged[i] = (token, "IN")
        self.keyphrase_candidate = extractor.extract_candidates(
            self.tokens_tagged, en_model)


# if __name__ == '__main__':
#     text = "Adaptive state feedback control for a class of linear systems with unknown bounds of uncertainties The problem of adaptive robust stabilization for a class of linear time-varying systems with disturbance and nonlinear uncertainties is considered. The bounds of the disturbance and uncertainties are assumed to be unknown, being even arbitrary. For such uncertain dynamical systems, the adaptive robust state feedback controller is obtained. And the resulting closed-loop systems are asymptotically stable in theory. Moreover, an adaptive robust state feedback control scheme is given. The scheme ensures the closed-loop systems exponentially practically stable and can be used in practical engineering. Finally, simulations show that the control scheme is effective"
#     ito = InputTextObj(en_model, text)
#     print("OK")