Ejemplos de tag en Python, ejemplos de nltk_contrib.timex.tag en Python

Ejemplo n.º 1

0

Mostrar archivo

    def timexTagText(self, altText=None):
        """Tags all the temporal expressions and surrounds them with <TIMEX2> XML tags in line with the text

        Args:
            None
            
        Returns:
            tagged text (str)
        
        """
        """When altText is specified, the method assumes that some random text is being sent to be tagged, so doesn't save in dictionary"""
        if altText is not None:
            raw = altText
            altOutput = timex.tag(raw)
            return altOutput

        else:
            """Otherwise, we first check if it exists in the textList dict, if not, it is created and returned"""
            self.file = open(self.filename)
            raw = self.file.read()
            if self.textList.get('timexTagText') is None:
                self.textList['timexTagText'] = timex.tag(raw)

            self.file.close()

        return self.textList.get('timexTagText')

Ejemplo n.º 2

0

Mostrar archivo

def namedEntityRecognizer():
    echo2("Performing NER on incoming stream")
    content = request.stream.read()
    if Verbose:
        echo2("Incoming content is " + content)
    start = time.time()
    date_time = timex.tag(content)
    tokenized = nltk.word_tokenize(content)
    tagged = nltk.pos_tag(tokenized)
    namedEnt = nltk.ne_chunk(tagged, binary=True)
    names = extract_entity_names(namedEnt, 'NE')
    names.extend(date_time)
    result = {"result": "success", "names": names}
    if Units:
        grammar = '''unit: {<CD><NNS>?<NN.*>?},
                     unit: {<CD><JJ>?<NN.*>}
                  '''
        parser = nltk.RegexpParser(grammar)
        units = extract_entity_names(parser.parse(tagged), 'unit')
        result['units'] = units
    jsonDoc = json.dumps(result,
                         sort_keys=True,
                         indent=4,
                         separators=(',', ': '))
    end = time.time()
    print "NER took " + str(end - start) + " seconds"
    return jsonDoc

Ejemplo n.º 3

0

Mostrar archivo

Archivo: temporal_enrich.py Proyecto: ruthvik947/solr-enrich

def extract_temporal_from_doc(doc):
    enriched = copy.deepcopy(doc)
    extracted_date_list = []
    master_list = []

    if '_version_' in enriched.keys():
        del enriched['_version_']
    if 'boost' in enriched.keys():
        del enriched['boost']

    if 'content' in doc.keys():
        master_list += doc['content']
    if 'title' in doc.keys():
        master_list += doc['title']

    for content in master_list:
        temporal_tags = timex.tag(content)
        for tag in temporal_tags:
            if tag not in extracted_date_list:

                if tag.isdigit() and len(tag) == 4:
                    # date_list = get year month day as list
                    datestring_tag = get_date_string_from_ymd(year=int(tag))
                    extracted_date_list.append(datestring_tag)

                # TODO: don't do anything for now because Solr doesn't accept non formatted
                # date strings (so make function to get a date format for the string tag)
                # else:
                    # extracted_date_list.append(tag)

    if (len(extracted_date_list)):
        enriched['extracted_dates'] = extracted_date_list

    return enriched

Ejemplo n.º 4

0

Mostrar archivo

Archivo: text_search.py Proyecto: summera/python-natural-language-search

 def __init__(self, query_string):
     self.query_string = query_string
     sentences = nltk.sent_tokenize(query_string)
     self.tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
     self.tagged_sentences = [nltk.pos_tag(sentence) for sentence in self.tokenized_sentences]
     self.binary_chunked_sentences = nltk.batch_ne_chunk(self.tagged_sentences, binary=True)
     self.multiclass_chunked_sentences = nltk.batch_ne_chunk(self.tagged_sentences, binary=False)
     self.temporal_sentences = timex.ground(timex.tag(query_string), mx.DateTime.gmt())

Ejemplo n.º 5

0

Mostrar archivo

Archivo: Preprocessor.py Proyecto: tkakar/FDA-Textmining

    def timexTagText(self, altText=None):
        """Tags all the temporal expressions and surrounds them with <TIMEX2> XML tags in line with the text

        Args:
            altText (str) The text to be tagged, if it is not the same as the whole narrative the preprocessor was created with. This text won't be stored.
            
        Returns:
            tagged text (str)
        
        """
        """When altText is specified, the method assumes that some random text is being sent to be tagged, so doesn't save in dictionary"""
        if altText is not None:
            raw = altText
            altOutput = timex.tag(raw)
            return altOutput

        else:
            """Otherwise, we first check if it exists in the textList dict, if not, it is created and returned"""
            raw = self.rawText()
            if Preprocessor.textList.get('timexTagText') is None:
                Preprocessor.textList['timexTagText'] = timex.tag(raw)

        return Preprocessor.textList.get('timexTagText')

Ejemplo n.º 6

0

Mostrar archivo

Archivo: text_search.py Proyecto: mikekiwa/python-natural-language-search

 def __init__(self, query_string):
     self.query_string = query_string
     sentences = nltk.sent_tokenize(query_string)
     self.tokenized_sentences = [
         nltk.word_tokenize(sentence) for sentence in sentences
     ]
     self.tagged_sentences = [
         nltk.pos_tag(sentence) for sentence in self.tokenized_sentences
     ]
     self.binary_chunked_sentences = nltk.batch_ne_chunk(
         self.tagged_sentences, binary=True)
     self.multiclass_chunked_sentences = nltk.batch_ne_chunk(
         self.tagged_sentences, binary=False)
     self.temporal_sentences = timex.ground(timex.tag(query_string),
                                            mx.DateTime.gmt())

Ejemplo n.º 7

0

Mostrar archivo

Archivo: server.py Proyecto: jung-jung-yeh/NLTKRest

def namedEntityRecognizer():
    echo2("Performing NER on incoming stream")
    content = request.stream.read()
    if Verbose:
        echo2("Incoming content is "+content)
    start = time.time()
    date_time = timex.tag(content)
    tokenized = nltk.word_tokenize(content)
    tagged = nltk.pos_tag(tokenized)
    namedEnt = nltk.ne_chunk(tagged, binary=True)
    names = extract_entity_names(namedEnt)
    names.extend(date_time)
    result = {"result" : "success", "names" : names}
    jsonDoc = json.dumps(result, sort_keys=True, indent=4, separators=(',', ': '))
    end = time.time()
    print "NER took "+str(end - start)+" seconds"
    return jsonDoc

Ejemplo n.º 8

0

Mostrar archivo

Archivo: server.py Proyecto: Kaspect/enrich

def namedEntityRecognizer():
    echo2("Performing NER on incoming stream")
    content = request.stream.read()
    if Verbose:
        echo2("Incoming content is "+content)
    start = time.time()
    date_time = timex.tag(content)
    tokenized = nltk.word_tokenize(content)
    tagged = nltk.pos_tag(tokenized)
    namedEnt = nltk.ne_chunk(tagged, binary=True)
    names = extract_entity_names(namedEnt, 'NE')
    names.extend(date_time)
    result = {"result" : "success", "names" : names}
    if Units:
        grammar = '''unit: {<CD><NNS>?<NN.*>?},
                     unit: {<CD><JJ>?<NN.*>}
                  '''
        parser = nltk.RegexpParser(grammar)
        units = extract_entity_names(parser.parse(tagged),'unit')
        result['units'] = units
    jsonDoc = json.dumps(result, sort_keys=True, indent=4, separators=(',', ': '))
    end = time.time()
    print "NER took "+str(end - start)+" seconds"
    return jsonDoc