Python TextParser.TextParser Examples

Programming Language: Python

Namespace/Package Name: text_parser

Class/Type: TextParser

Method/Function: TextParser

Examples at hotexamples.com: 4

Python TextParser.TextParser - 4 examples found. These are the top rated real world Python examples of text_parser.TextParser.TextParser extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TextParser(4)

getTopWords(4)

__init__(3)

insertCaption(2)

insertText(2)

create(1)

get_next_feed_dict(1)

list_of_ip_adresses_contained_in_raw_text(1)

parse_words(1)

prepare_batches(1)

switch_split(1)

Example #1

Show file

File: ip_populator.py Project: vidoseaver/python_challenge

    def populate_ips_from_text_file(self):

        # Loading the initial file
        file_content = self.get_text_from_file(file_path=self.file_path)

        # Instantiating the text parser that grabs the ips
        text_parser = TextParser(raw_text=file_content)
        raw_ips = text_parser.list_of_ip_adresses_contained_in_raw_text()

        # Get env variables for the GeoIpService
        env_variable_getter = EnvVariableGetter()
        api_key = env_variable_getter.get_variable("api_key")
        api_url = env_variable_getter.get_variable("api_url")

        # Instantiate GeoIp service which is responsible for going out and getting ip geolocation
        geo_ip_service = GeoIpService(key=api_key, url=api_url)

        # Get the Geo Ip info using the geo_ip_service, generate ip_models from
        # the response date and store them in the list and the dict for further filtering
        for ip in raw_ips:
            geo_ip_resonse = geo_ip_service.get_geo_ip_info_for_ip(
                ip_address=ip, format="json")
            self.generate_and_store_ip_model(ip=ip, data=geo_ip_resonse.json())

Example #2

Show file

from text_parser import TextParser
import time

path = '/home/tim/Dropbox/Notes/journal.txt'

tp = TextParser(path)
tp.prepare_batches(128, 32, 4)

print len(tp.vocab.keys())

for i in range(10):
    tp.switch_split('cv')
    print tp.get_next_feed_dict('a', 'b')
    tp.switch_split('train')
    print tp.get_next_feed_dict('a', 'b')
    tp.switch_split('cv')
    print tp.get_next_feed_dict('a', 'b')
    tp.switch_split('train')
    print tp.get_next_feed_dict('a', 'b')

for i in range(1000000):
    tp.get_next_feed_dict('a', 'b')

Example #3

Show file

def tokenize(text):
    # print text
    tokenizer = TextParser(stopword_file='stopwords.txt')
    # tokens = tokenizer.parse_words(text)
    tokens = tokenizer.parse_words(text, stem=True)
    return tokens

Example #4

Show file

    def clean_words(self, word_parser):
        self.words = word_parser.parse_words(self.raw_message)

    # clean words for one message (NOT efficient)
    def clean_words_pos(self, word_parser, preserve_tags, ark_run_cmd):
        self.words = word_parser.parse_words_by_ark_nlp(self.raw_message, \
                                                preserve_types, ark_run_cmd)

    def set_clean_words(self, clean_words):
        # self.words = list(set(clean_words))
        self.words = clean_words

    def remove_stopwords(self, stopword_set):
        trimed_words = []
        for w in self.words:
            if w not in stopword_set:
                trimed_words.append(w)
        self.words = trimed_words


if __name__ == '__main__':
    wp = TextParser(min_length=2)
    m = Message(
        'hello, This is@ went octopi just a test for 12you!. Try it http://')
    preserve_types = ['V', 'N', '^']
    ark_run_cmd = 'java -XX:ParallelGCThreads=2 -Xmx2G -jar /Users/chao/Dropbox/code/lib/ark-tweet-nlp-0.3.2.jar'
    m.clean_words(wp)
    print m.words
    m.clean_words_pos(wp, set(['S', 'N', '^']), ark_run_cmd)
    print m.words