Python Sentence.from_raw Examples

Programming Language: Python

Namespace/Package Name: sentence

Class/Type: Sentence

Method/Function: from_raw

Examples at hotexamples.com: 11

Python Sentence.from_raw - 11 examples found. These are the top rated real world Python examples of sentence.Sentence.from_raw extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Sentence(30)

append(7)

base_form(7)

getComponents(6)

generate(6)

deserialize(3)

ends_with_dot(3)

add_token(3)

from_raw(3)

from_json(3)

from_dict(3)

get_longest_word(2)

classify(2)

getUnique(2)

add_clause(2)

get_grammatical_objects(2)

get_attributes(2)

get_grammatical_object_pairs(2)

cosine_distance_wordembedding_method(2)

get_cuted_sentence(2)

get_cut_sentence(1)

get_first_word(1)

get_clauses_with(1)

get_all_words(1)

get_all_sentences(1)

getTypes(1)

get_hint(1)

_m_words_before_cmp(1)

get_json(1)

pure_symbols(1)

to_json(1)

tags(1)

set_text(1)

set_language(1)

set_author(1)

setType(1)

setOriginal(1)

process_sentence_data(1)

getOriginal(1)

media(1)

load_cached_parses(1)

load_cached_concepts(1)

learn(1)

language(1)

get_part_of_speech(1)

get_ngrams(1)

getReadableList(1)

_cmp_r_i(1)

getLexems(1)

cache_parses(1)

Example #1

Show file

File: clean_tokenize_save.py Project: stasi009/MyKaggle

def tokenize_merge(row):
    allwords = []
    for text in row.iloc[1:].dropna():
        text = text.lstrip("b\'").lstrip("b\"").lstrip("b\'''")
        s = Sentence.from_raw(text,StopWords,neg_mark=True)
        allwords += s.words

    print allwords# show progress
    return allwords

Example #2

Show file

File: tests.py Project: stasi009/OpinionMining728

def test_negation_suffix():
    stopwords = common.make_stop_words()
    sentences = [   "I don't like Beijing 123, because it's too expensive", "I cannot 4 run away 56, since I am a grown man",
                    "never ever come back again, I swear to god","without any problem","I don't think I will enjoy it: it might be too spicy" ]
    for index,raw_sent in enumerate(sentences):
        sentence = Sentence.from_raw(raw_sent,stopwords)
        print "\n=========================== [{}]".format(index+1)
        print sentence.raw
        print sentence.words

Example #3

Show file

File: clean_tokenize_save.py Project: stasi009/MyKaggle

def tokenize_merge(row):
    allwords = []
    for text in row.iloc[1:].dropna():
        text = text.lstrip("b\'").lstrip("b\"").lstrip("b\'''")
        s = Sentence.from_raw(text, StopWords, neg_mark=True)
        allwords += s.words

    print allwords  # show progress
    return allwords

Example #4

Show file

File: segment_aspects.py Project: stasi009/OpinionMining728

def preproc_save_sentences(filename,raw_sent_stream,extra_stopwords = None):
    stop_words = set(stopwords.words("english"))
    if extra_stopwords is not None:
        stop_words |= set(extra_stopwords)

    with open(filename,"wt") as outf:
        outf.write("[")

        for index,raw_sent in enumerate( raw_sent_stream):
            prev_terminator = '\n' if index ==0 else ',\n'
            sentence = Sentence.from_raw(raw_sent,stop_words)
            if len(sentence.words)>0:
                outf.write(prev_terminator + sentence.dump_json())
                print "{}-th sentence processed and saved".format(index+1)

        outf.write("\n]")

Example #5

Show file

def test_sentence():
    stopwords = text_utility.make_stop_words()

    texts = [
        "can't is a contraction", "she isn't my wife any more",
        "I am not in USA right now", "I'm a Chinese",
        "1630 NE Valley Rd, Pullman, WA, 99163, Apt X103",
        "I should've done that thing I didn't do", "I don't love her any more",
        "I want to divorce without hesitation", "bye, Pullman, bye, USA"
    ]

    for index, text in enumerate(texts):
        sent = Sentence.from_raw(text, stopwords, True)
        print "\n******************** {}".format(index + 1)

        print sent.raw
        print "===>"
        print sent.words

Example #6

Show file

File: tests.py Project: stasi009/MyKaggle

def test_sentence():
    stopwords = text_utility.make_stop_words()

    texts = [   "can't is a contraction",
                "she isn't my wife any more",
                "I am not in USA right now",
                "I'm a Chinese",
                "1630 NE Valley Rd, Pullman, WA, 99163, Apt X103",
                "I should've done that thing I didn't do",
                "I don't love her any more",
                "I want to divorce without hesitation",
                "bye, Pullman, bye, USA"]

    for index,text in enumerate(texts):
        sent = Sentence.from_raw(text,stopwords,True)
        print "\n******************** {}".format(index+1)

        print sent.raw
        print "===>"
        print sent.words

Example #7

Show file

File: tag_reviews.py Project: stasi009/LearnMyMachine

def print_topics(txt):
    sentence = Sentence.from_raw(txt,stop_words)
    print "\n{}\n".format(sentence.raw)

    coded_words = wordcoder.code(sentence.words)
    bow = dictionary.doc2bow(coded_words)

    topic_distribution = lda_model[bow]
    topic_distribution.sort(key=lambda t: t[1], reverse=True)

    tags = None
    for index, (topic_id, topic_percentage) in enumerate(topic_distribution):
        mt = MixTopic(topic_mapping[topic_id])
        mt.weight(topic_percentage)

        if tags is None:
            tags = mt
        else:
            tags.add(mt)

    tags.normalize()
    print tags

Example #8

Show file

File: db_tasks.py Project: stasi009/OpinionMining728

def update_add_neg_suffix(dbname,query_condition):
    stop_words = common.make_stop_words()
    client = MongoClient()
    review_collection = client[dbname]['reviews']

    cursor = review_collection.find(query_condition,{"sentences.raw":1,"sentences.words":1})
    for rindex,rd in enumerate(cursor):
        review = Review.from_dict(rd)

        update_content = {}
        for sindex,sent in enumerate(review.sentences):
            new_sent = Sentence.from_raw(sent.raw,stop_words)
            if set(new_sent.words) != set(sent.words):
                update_content["sentences.{}.words".format(sindex)] = new_sent.words

        if len(update_content)>0:
            result = review_collection.update_one({"_id":review.id},{"$set":update_content})
            if result.modified_count != 1:
                raise Exception("failed to update review<{}>".format(review.id))

        print "{}-th review updated {} sentences".format(rindex+1,len(update_content))

    client.close()

Example #9

Show file

File: review.py Project: stasi009/MyKaggle

 def __init__(self, id=None, text=None, is_positive=None):
     self.id = id
     self.sent = None if text is None else Sentence.from_raw(
         text, Review.StopWords, neg_mark=True)
     if self.sent is not None:
         self.sent.sentiment = is_positive

Example #10

Show file

File: review.py Project: stasi009/LearnMyMachine

 def __init__(self,id = None,text = None,is_positive=None):
     self.id = id
     self.sent = None if text is None else Sentence.from_raw(text,Review.StopWords,neg_mark=True)
     if self.sent is not None:
         self.sent.sentiment = is_positive

Example #11

Show file

File: review.py Project: stasi009/OpinionMining728

 def assign_comment(self,text,stop_words):
     self.sentences = []
     for raw_sentence in Review.SentTokenizer.tokenize(text):
         sent = Sentence.from_raw(raw_sentence,stop_words)
         if len(sent.words) >0:
             self.sentences.append(sent)