Python RegexpParser.RegexpParserの例、nltk.chunk.RegexpParser.RegexpParser Pythonの例

コード例 #1

0

ファイルを表示

 def ProcessWoeds(self, arr):
     tagged = pos_tag(arr)
     chunkGram = r"""Chunk:{<RB.?>*<VB.?>*<NNP>}"""
     chunkParser = RegexpParser(chunkGram)
     chunked = chunkParser.parse(tagged)
     return chunked
     print(chunked)

コード例 #2

0

ファイルを表示

ファイル: Chunker.py プロジェクト: danjamker/N-Fly

    def Chunk(self,
              sentence,
              node='NP',
              grammer=r"""
                  NP: {<DT|PP\$>?<JJ>*<NN>}
                      {<NNP>+}
                      """):
        '''
        Takes text and returns a list of noune and noun phrases, this is done
        by a form RegEx matching which is included in the NLTK libary.
    
        @param text: the text that is going to be chunked
        @param node='NP': this is which node to chunk 
        @param grammer='NP: {<DT|PP\$>?<JJ>*<NN>}{<NNP>+}': the grammar ReGex to use for chunking 
        
        @return: A nested list of tuples of chunked phrases with pos tagging.
        '''

        tmp = []

        cp = RegexpParser(grammer)

        for sent in sentence:
            for phrase in self.sub_leaves(cp.parse(sent), node):
                tmp.append(phrase)

        results = []
        for phrase in tmp:
            string = ""
            for (word, tag) in phrase:
                string = string + word + " "

            results.append(string[:-1])

        return results

コード例 #3

0

ファイルを表示

def get_chunks(tagged_sentences):
    master_list = []
    master_noun = []
    master_adj = []
    grammar = r"""
    CHUNK1:
        {<NN.*><.*>{0,3}<JJ.*>}  # Any Noun terminated with Any Adjective
    
    CHUNK2:
        {<JJ.*><.*>{0,3}<NN.*>}  # Nouns or Adjectives, terminated with Nouns
    """
    cp = RegexpParser(grammar)
    for sent in tagged_sentences:
        tree = cp.parse(sent)
        for subtree in tree.subtrees(
                filter=lambda t: t.label() in ['CHUNK1', 'CHUNK2']):
            if (str(subtree).find('NN') > 0 or str(subtree).find('NNS') > 0
                    or str(subtree).find('NNP') > 0) and (
                        str(subtree).find('JJ') > 0
                        or str(subtree).find('JJS') > 0
                        or str(subtree).find('JJR') > 0):
                nouns = [
                    word for word, tag in subtree.leaves()
                    if tag in ['NN', 'NNS', 'NNP']
                ]
                adjss = [
                    word for word, tag in subtree.leaves()
                    if tag in ['JJ', 'JJR', 'JJS']
                ]
                master_noun.extend([nouns])
                master_adj.extend([adjss])
    return [m[0] + ":" + n[0] for m, n in zip(master_noun, master_adj)]

コード例 #4

0

ファイルを表示

	def parse(self):
		"""
			Parse le texte tokenisé à l'aide de notre grammaire créé pour récupérer les groupes de mots 
			contenant une NE.
		"""
		if self.own_tag:
			rp = RegexpParser(Parser.GRAMMAR_OWN_TAG)
		else:
			rp = RegexpParser(Parser.GRAMMAR)
		tree = rp.parse(self.tokens)

		for subtree in tree.subtrees():
			if subtree.label() == "S":
				continue
			self.tagged_nodes.append(
				[subtree.label(), subtree.leaves()]
			)
		print(self.tagged_nodes)

コード例 #5

0

ファイルを表示

ファイル: nlrequestprocess.py プロジェクト: AlexDel/autobot

def parse_request(message):
    tagPatterns = [
        (r'(honda|toyota|ford|kia|hyundai|audi|bmw|opel|mitsubishi|mazda|skoda|skoda|subaru)$',
         'VENDOR'),
        (r'([a-zA-Z0-9]+)$', 'MODEL'),
        (r'(от|для)$', 'PREP'),
        (r'(нах|бля|твою мать)$', 'PROFANITY'),
        (r'([а-яА-Я]+)$', 'PART_NAME'),
    ]

    tagger = nltk.RegexpTagger(tagPatterns)
    taggedRequest = tagger.tag(nltk.word_tokenize(message))

    chunker = RegexpParser(r'''
	    S: {<CAR> <PREP>? <PART_NAME>}
	    MODEL: {<MODEL>+}
	    VENDOR: {<VENDOR>}
	    CAR: {<VENDOR> <MODEL>}
	    PROFANITY: {<PROFANITY>+}
	    PART_NAME: {<PART_NAME>+}
	''')

    tree = chunker.parse(taggedRequest)

    car = list(tree.subtrees(lambda t: t.label() == 'VENDOR'))
    parsed_request = {}

    # Hack with try except
    try:
        parsed_request['vendor'] = list(
            tree.subtrees(lambda t: t.label() == 'VENDOR'))[0].leaves()[0][0]
    except Exception:
        parsed_request['vendor'] = None
    try:
        parsed_request['model'] = ' '.join([
            leave[0] for leave in list(
                tree.subtrees(lambda t: t.label() == 'MODEL'))[0].leaves()
        ])
    except Exception:
        parsed_request['model'] = None
    try:
        parsed_request['part_name'] = ' '.join([
            leave[0] for leave in list(
                tree.subtrees(lambda t: t.label() == 'PART_NAME'))[0].leaves()
        ])
    except Exception:
        parsed_request['part_name'] = None
    try:
        if len(list(tree.subtrees(lambda t: t.label() == 'PROFANITY'))):
            parsed_request['profanity'] = True
        else:
            parsed_request['profanity'] = False
    except Exception:
        parsed_request['profanity'] = False

    return parsed_request

コード例 #6

0

ファイルを表示

 def __init__(self,
              patterns: str,
              loop: int = 1,
              trace: int = 0,
              attribute: str = 'pos',
              apply_iob2: bool = True) -> None:
     self.__attribute = attribute
     self.__regex_parser = RegexpParser(patterns,
                                        root_label='',
                                        loop=loop,
                                        trace=trace)
     self.__apply_iob2 = apply_iob2

コード例 #7

0

ファイルを表示

ファイル: exercise_03.py プロジェクト: psnint/IST-Projects

def generate_chunks(tagged_sent, expression=r'CHUNK: {(<adj>* <n.*>+ <prp>)? <adj>* <n.*>+}'):
    chunks = []
    chunkParser = RegexpParser(expression)
    try:
        if len(tagged_sent) == 0:
            tree = Tree('S', [])
        else:
            tree = chunkParser.parse(tagged_sent, trace=0)
        for subtree in tree.subtrees():
            if subtree.label() == "CHUNK":
                chunks.append(subtree.leaves())
    except ValueError:
        chunks = []
    return chunks

コード例 #8

0

ファイルを表示

    def _chunker(self, tuple_sent):
        """Chunk base-phrases using chunking rules.

        Args:
            tuple_sent (list(tuple(str, str)))

        Returns:
            chunk_struct Tree('S', [Tree('CHUNK', [(str, str), (str, str)]], (str, str), ...): chunked sentence
        """
        chunkTreeList = []

        chunker = RegexpParser(self._ChunkingRule(self._CHUNK_RULE_VXP_))

        chunk_struct = chunker.parse(tuple_sent)

        return chunk_struct

コード例 #9

0

ファイルを表示

def rule_based_reqs_chunk(tagged_reqs, ids):
    chunker = RegexpParser(ruleset)
    terms = []
    term_index = []
    for i, t in enumerate(tagged_reqs):
        s = chunker.parse(t)
        for c in s:
            if not isinstance(c, tuple):
                if c.label() == 'NP':
                    term = []
                    for tagged_word in c:
                        if (tagged_word[1] != 'DT') and (tagged_word[1] !=
                                                         'PRP$'):
                            term = term + [tagged_word[0]]
                    terms.append(term)
                    term_index.append(i)
    return terms, term_index

コード例 #10

0

ファイルを表示

ファイル: seva_toie.py プロジェクト: jitinkrishnan/NASA-SE

def additionalExtractions(dep_triples, tagged_sentence, svo_triples):
    if not svo_triples:
        return None
    grammar = "SmallNP: {(<CD.*>|<JJ.*>)<NN.*>+}"
    cp = RegexpParser(grammar)
    chunk = cp.parse(tagged_sentence)
    triple_array = []
    for subtree in chunk.subtrees():
        if subtree.label() == 'SmallNP':
            for triple in svo_triples:
                pos = subtree.leaves()
                loc1 = tag_index(pos, triple[0])
                if loc1 != -1:
                    triple_array.extend(chunk_triples(pos, loc1))
                loc2 = tag_index(pos, triple[2])
                if loc2 != -1:
                    triple_array.extend(chunk_triples(pos, loc2))
    return triple_array

コード例 #11

0

ファイルを表示

	def preprocessing(self,desc):
		desc = desc.replace(","," ")
		desc = desc.replace("!","")
		desc = desc.replace("@","")
		desc = desc.replace("#","")
		desc = desc.replace("%","")
		desc = desc.replace("(","")
		desc = desc.replace(")","")
		desc = desc.replace(":","")
		desc = desc.replace("{","")
		desc = desc.replace("}","")
		desc = desc.replace("`","")
		desc = desc.replace("[","")
		desc = desc.replace("]","")
		desc = desc.replace("'","")
		desc = desc.replace("*","")
		desc = desc.replace("&","")
		desc = desc.replace("^","")
		print desc
		if "I/O" in desc:
			desc = desc.replace("I/O","IO")
		desc = desc.replace("/"," and ")
		tokenized = nltk.word_tokenize(desc)
		posTag = nltk.pos_tag(tokenized)
		grammar = '''
		RB: {<RB> | <RBS> | <RBR>}'''
		chunker = RegexpParser(grammar)
		chunked = chunker.parse(posTag)
		print chunked

		for n in range(len(chunked)):
			if str(chunked[n]).startswith('(RB') is True:
				if n is 0 :
					s = str(chunked[n]).split(" ")
					ss = s[1].split("/")
					removalWord = ss[0]
					desc = desc.replace(removalWord+" ","")
				if n>0 and n<=len :
					s = str(chunked[n]).split(" ")
					ss = s[1].split("/")
					removalWord = ss[0]
					desc = desc.replace(" "+removalWord,"")
		return desc

コード例 #12

0

ファイルを表示

    def exctract_ngrams(self, tagged_sent):
	'''
	Exctract ngrams, given a list of chunk rules for the previously tagged sentence.

	Keyword arguments:
	@param tagged_sent the POST tagged sentence whose ngrams need to be exctracted
	'''

        chunker = RegexpParser(CHUNK_RULE)
        tree = chunker.parse(tagged_sent)
        ngrams = []
        for item in self.__leaves(tree):
            if not item == tagged_sent:
                probable_ngram = ' '.join(self.__stemmer.stem(
                    word.lower()) for (word, pos) in item
                )
                if self.__evaluate_polarity_ngram(probable_ngram):
                    ngrams.append(probable_ngram)
        return ngrams

コード例 #13

0

ファイルを表示

ファイル: utils.py プロジェクト: psnint/IST-Projects

def get_noun_phrases(text_list, tagger):
    noun_phrases = []
    tagged_texts = [tagger.tag(text.split()) for text in text_list]

    expression = r'NOUN_PHRASE: {(<adj>* <n.*>+ <prp>)? <adj>* <n.*>+}'

    chunkParser = RegexpParser(expression)

    for tagged_sent in tagged_texts:
        try:
            if len(tagged_sent) == 0:
                tree = Tree('S', [])
            else:
                tree = chunkParser.parse(tagged_sent, trace=0)
            for subtree in tree.subtrees():
                if subtree.label() == "NOUN_PHRASE":
                    noun_phrases.append([el[0] for el in subtree.leaves()])
        except ValueError:
            noun_phrases = []
    return noun_phrases

コード例 #14

0

ファイルを表示

ファイル: noun_pharse.py プロジェクト: naveenjr/Nounpharse-extraction

def chunking_noun(document):
    #Get the words in the document
    words = word_tokenize(document)
    tagged = nltk.pos_tag(words)
    counts = Counter(tag for WORD, tag in tagged)
    counts = dict(counts)
    #print(counts)
    chunkGram = r""" PHRASE: {(<JJ>* <NN.*>+ <IN>)? <JJ>* <NN.*>+}"""
    chunkParser = RegexpParser(chunkGram)
    chunked = chunkParser.parse(tagged)
    serch_keywords = []
    for tree in chunked.subtrees():
        if tree.label() == 'PHRASE':
            serch_keyword = ' '.join([x for x, y in tree.leaves()])
            serch_keywords.append(serch_keyword)
    serch_keywords = [
        w for w in serch_keywords
        if len(w.split(' ')) > 1 and len(w.split(' ')) <= 3
    ]
    return serch_keywords, tagged, counts

コード例 #15

0

ファイルを表示

def extract_candidate_phrases(document_obj, parts_of_speech_re=DEFAULT_RE):
    '''
    :param document_obj: document from which you want to extract parts of the speech (candidate phrases)
    :param parts_of_speech_re: regular expression with parts of speech structure
    :return: dict, keys are the sentence id and values list of candidate phrases for that sentence
    '''
    candidate_phrases = {}

    # get sentences of the document
    sentences = document_obj.get_sentences()

    # for each sentence
    for sentence in sentences:
        sentence_id = sentence.get_sentence_id()

        # get tokens
        tokens_objs = sentence.get_tokens()

        # list of tuples with token and its pos
        token_pos_list = [(token_obj.get_token_str(),
                           token_obj.get_token_pos())
                          for token_obj in tokens_objs]

        # create regex parser with regular expression of tags
        regex_parser = RegexpParser(parts_of_speech_re)
        sentence_regex_tree = regex_parser.parse(token_pos_list)

        # get all subtrees with NP label
        match_subtrees = sentence_regex_tree.subtrees(
            filter=lambda t: t.label() == STAGE_MARKER)
        sentence_candidate_phrases = []

        # add candidate phrases
        for subtree in match_subtrees:
            leaves_str = ' '.join(
                [leave_token_pos[0] for leave_token_pos in subtree.leaves()])
            sentence_candidate_phrases.append(leaves_str)

        candidate_phrases[sentence_id] = sentence_candidate_phrases

    return candidate_phrases

コード例 #16

0

ファイルを表示

ファイル: search_tags.py プロジェクト: bhargavaganti/NLBooking

def get_search_tags(a, verbose=False):
    if verbose:
        print()
        print('-' * 100)
        print("\tRunning `get_search_tags`...")
        print('-' * 100)

    search_tag_parser = RegexpParser("STAG: {\
        (<RB>|<RBR>|<RBS>|<VB>|<VB[A-Z]>|<IN>|<CC>)\
        (<JJ>|<JJR>|<JJS>|<DT>)\
        (<NN>|<NNS>|<NNP>|<NNPS>)+\
        }")

    pos_tags = pos_tag(word_tokenize(a))
    if verbose:
        print("Part of Speech Tags:", pos_tags, '\n')

    data = search_tag_parser.parse(pos_tags)
    if verbose:
        print("Matched Search Tags:", data)

    return extract_tags(data)

コード例 #17

0

ファイルを表示

ファイル: keywords.py プロジェクト: neelkanthpoosa/KeywordExtractionApp

def extract_candidate_keywords(document):

    #Get the words in the document
    words = word_tokenize(document)

    # Chunk first to get 'Candidate Keywords'
    tagged = nltk.pos_tag(words)
    chunkGram = r""" PHRASE: 
                        {(<JJ>* <NN.*>+ <IN>)? <JJ>* <NN.*>+}
                """

    chunkParser = RegexpParser(chunkGram)
    chunked = chunkParser.parse(tagged)

    candidate_keywords = []
    for tree in chunked.subtrees():
        if tree.label() == 'PHRASE':
            candidate_keyword = ' '.join([x for x,y in tree.leaves()])
            candidate_keywords.append(candidate_keyword)

    candidate_keywords = [w for w in candidate_keywords if len(w) > 3 and  len(w.split(' ')) < 6]
    #print("Data XYZ:",candidate_keywords) 
    return candidate_keywords

コード例 #18

0

ファイルを表示

ファイル: candidate_extractor.py プロジェクト: lcy081099/entity-linking

from util import sub_leaves

SINGLE_WORD_FREQ_CUT_OFF = 6
PATTERNS = r'''
    NP:  {<CD|VBN>?<NN.*|JJ.*>*<CD>?<NN.*|VBG><CD>?}
'''
PATTERNS_X = r'''
    NP:  {<NN.*|JJ.*|CD>*<NN.*|VBG><CD>?}
         {<NN.*|JJ.*>*<CD>?<NN.*|VBG><CD>?}
'''
PATTERNS_ALT = r'''
    NP:  {<NN.*|JJ.*>*<NN.*><CC><NN.*|VBG><CD>?}
         {<NN.*|JJ.*>*<CD>?<NN.*|VBG><CD>?}
'''
# ('2009', 'CD'), ('Grammy', 'NNP'), ('Awards', 'NNS')
NP_CHUNCKER = RegexpParser(PATTERNS)
EARLY_CANDIDATE_CUTOFF = 25
LATE_CANDIDATE_CUTOFF = 10


def extract_candidates(tagged_sentences):
    '''
    Returns three lists:
        - the candidate key concepts of the given document;
        - the candidate key concepts occurring early in the given document; and 
        - the candidate key concepts occurring late in the given document.
        @param tagged_sentences: The POS tagged document.    
    '''
    #print tagged_sentences
    candidates = []
    early = set([])

コード例 #19

0

ファイルを表示

# Regex-based shallow parser.
# The Tree structures used to represent parsed sentences in NLTK get converted to ChunkString objects here.
# Create an object RegexpParser using chunking and chunking rules (classes ChunkRule and ChinkRule)

smple_sntnc = 'The brown fox is quick and he is jumpling over the lazy dog'

# Create POS tagged tokens from sample sentence
tagged_sentence = tag(smple_sntnc)

print(tagged_sentence)

# Create the shallow parser
grammar = """
NP: {<DT>?<JJ>?<NN.*>}
ADJP: {<JJ>}
ADVP: {<RB.*>}
PP: {<IN>}
VP: {<MD>?<VB.*>+}
"""

rc = RegexpParser(grammar)

# Shallow parse the sample sentence
c = rc.parse(tagged_sentence)
print(c)

# Evaluate parser performance on test data
print(rc.evaluate(test_data))

コード例 #20

0

ファイルを表示

ファイル: nltk_processors.py プロジェクト: awoziji/forte

 def initialize(self, resources: Resources, configs: Config):
     super().initialize(resources, configs)
     self.chunker = RegexpParser(configs.pattern)

コード例 #21

0

ファイルを表示

def _build_noun_chunker():
    """ Build a noun chunker. """
    det_pos = "(<DT|PRP\$?|CD>|<DT>?<NN.?><POS>)"
    np_chunk = "{{{}?<JJ|W.*>*<NN.*>+}}".format(det_pos)
    np_grammar = "NP: {}".format(np_chunk)
    return RegexpParser(np_grammar)

コード例 #22

0

ファイルを表示

ファイル: nlpcourse1_5hours.py プロジェクト: 1UC1F3R616/myNLP

import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import RegexpParser

tokenized_data = word_tokenize(dataset)
pos_tagging = pos_tag(tokenized_data)

chunk_sequence = """
chunk:
{<NNPS>+}
{<NNP>+}
{<NN>}"""

chunk = RegexpParser(chunk_sequence)

chunked_data = chunk.parse(pos_tagging)
print(chunked_data)
"""## Named Entity Recognition
- Also known as
  - Entity Identification
  - Entity Chunking
  - Entity Extraction
- It is a subtask of information extraction that classify named entities into pre-defined categories such as names of persons, organizations, locations
- Tesla: Organization, Elon Musk: Person

### Applications
- classify the contents to news providers
- Efficent search Algorithms
- Content recommendation

コード例 #23

0

ファイルを表示

ファイル: shallow_parsing.py プロジェクト: santosh500/APL_PythonDeepLearning

train_data = data[:4000]
test_data = data[4000:]
print train_data[7]

simple_sentence = 'the quick fox jumped over the lazy dog'

from nltk.chunk import RegexpParser
from pattern.en import tag

tagged_simple_sent = tag(simple_sentence)
print tagged_simple_sent

chunk_grammar = """
NP: {<DT>?<JJ>*<NN.*>}
"""
rc = RegexpParser(chunk_grammar)
c = rc.parse(tagged_simple_sent)
print c

chink_grammar = """
NP: {<.*>+} # chunk everything as NP
}<VBD|IN>+{
"""
rc = RegexpParser(chink_grammar)
c = rc.parse(tagged_simple_sent)
print c

tagged_sentence = tag(sentence)
print tagged_sentence

grammar = """

コード例 #24

0

ファイルを表示

 def __init__(self, setupData):
     super(RegexpChunker, self).__init__(setupData)
     self.chunker = RegexpParser(setupData)

コード例 #25

0

ファイルを表示

ファイル: plnTopX.py プロジェクト: serbarbosa/sentiment-pipeline

def GetPatternsTree(tagsList, pattern, patternName):
    gramaticalAnalyse = RegexpParser(pattern)
    tree = gramaticalAnalyse.parse(tagsList)
    patt = ExtractPhrases(tree, patternName)
    return patt

コード例 #26

0

ファイルを表示

ファイル: Text_Preprocessing.py プロジェクト: devthedevil/Machine-learning-Lab

    ''.join(c for c in s if c not in string.punctuation)
    for s in sentence_token
]
sentence_token = [s for s in sentence_token if s]
print(sentence_token)


#POS Tagging, Chunking and N-grams
def extract_ngrams(data, num):
    n_grams = ngrams(word_tokenize(data), num)
    return [' '.join(grams) for grams in n_grams]


for t in sentence_token:
    #POS_Tagging
    print(t)
    wordsList = word_tokenize(t)
    pos_tagged = pos_tag(wordsList)
    print("After POS-Tagging\n")
    print(pos_tagged)

    #Chunking
    chunker = RegexpParser(r"""Chunk: {<RB.?>*<VB.?>*<NNP>+<NN>?}""")
    output = chunker.parse(tagged)
    print("After chunking", '\n')
    print(output)

    #3-grams
    print("3 grams : ")
    print(extract_ngrams(t, 3))

コード例 #27

0

ファイルを表示

ファイル: PostPatternStrategy.py プロジェクト: buhtigexa/Nerit

 def __init__(self, grammar="", loop=2):
     super(PostPatternStrategy, self).__init__()
     self.postChunker = RegexpParser(grammar, loop)
     self.grammar = grammar
     self.loop = loop

コード例 #28

0

ファイルを表示

ファイル: nltk_hand)n.py プロジェクト: kingafy/My_codes_NLP

synonyms = []
for syn in wn.synsets('girl'):
    print(syn)
    for lemma in syn.lemmas(): #  A lemma is basically the dictionary form or base form of a word, as opposed to the various inflected forms of a word. 
        print(lemma)
        synonyms.append(lemma.name())
synonyms


antonyms = []
for syn in wn.synsets("girl"):
    for l in syn.lemmas():
        if l.antonyms():
            antonyms.append(l.antonyms()[0].name())
antonyms


###chunking####
from nltk import pos_tag
tags = pos_tag(tokens)
tags

from nltk.chunk import RegexpParser
grammar = "NP: {<DT>?<JJ>*<NN>}"
chunker = RegexpParser(grammar)
result = chunker.parse(tags)
result

chunker = RegexpParser(grammar)
result = chunker.parse(tags)
result

コード例 #29

0

ファイルを表示

ファイル: tagger.py プロジェクト: JialuCarolChen/Document-ML-NLP

farechunker = RegexpParser(r'''

CARRIER:
{<CODESHARE><CODESHARE><CODESHARE><CODESHARE>}
{<CODESHARE><CODESHARE><CODESHARE>}
{<CODESHARE><CODESHARE>}
{<CODESHARE><NN>}



ROUTE:
{<ROUTE>}

CABIN:
{<CABIN>}

RBD:
{<BOOKING><CLASS>}


CORPORATE_DISCOUNT:
{<CORPORATE><DISCOUNT>}
{<EFFECTIVE><DISCOUNT>}

AGENT_DISCOUNT:
{<DISCOUNT>}



FBC:
{<FBC><VBD><TO><VBP><DISCOUNT>}
{<FARE><BASIS>}


TICKET_VALIDITY:
{<TICKET><VALIDITY>}


LOCATION:
{<LOCATIONTYPE><NN><.*>}

AIRLINE:
{<CAT><PACIFIC><AIRWAYS><CITY>}

CLIENT:
<AIRLINE>{<.*><.*>}<TOURCODE>



''')

コード例 #30

0

ファイルを表示

sentencas_treinadoras = mac_morpho.tagged_sents()[0:15000]
#Cria o UnigramTagger com base no etiquetador padrão e treina-o com as sentenças etiquetadas do mac_morpho
etiq = UnigramTagger(sentencas_treinadoras, backoff=etiqPadrao)

coment = str(input("Entre com o texto: "))
if coment == "default":
        coment = open("default.txt", "r").read().replace("\n", " ")
#O texto é convertido em tokens
tokens=nltk.word_tokenize(coment.lower())
#É etiquetada cada token do texto
tags = etiq.tag(tokens)

#É criado o analisador de expresões regulares contendo os padrões procurados
analiseGramatical = RegexpParser(r"""
		PADRAO7: {<N><ADJ>}
        PADRAO1: {<ADJ><N>(<PREP>?<N>)*}
        PADRAO2: {<ADV><ADV>?<ADJ>(<N>(<PREP>?<N>)*)?}
        PADRAO3: {<N>(<PREP>?<N>)*(<ADJ>)<ADV><ADV>?}
        PADRAO4: {<N>(<PREP>?<N>)*<ADV>?<ADJ>+}
        PADRAO5: {<ADV><V>}
        PADRAO6: {<V><ADV>}
		""")
#O analisador é então utilizado para a geração da árvore de padrões
arvore = analiseGramatical.parse(tags)
x = [ExtractPhrases(arvore, "PADRAO1"), ExtractPhrases(arvore, "PADRAO2"),
     ExtractPhrases(arvore, "PADRAO3"), ExtractPhrases(arvore, "PADRAO4"),
     ExtractPhrases(arvore, "PADRAO5"), ExtractPhrases(arvore, "PADRAO6"),
     ExtractPhrases(arvore, "PADRAO7")]
for aux in range(len(x)):
        print("PADRAO 0"+str(aux+1)+str(x[aux]))