Python Sentence Exemples, pattern.en.Sentence Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_search.py Projet : sb123456789sb/pattern-1

 def test_search_function(self):
     # Assert search() function.
     s = Sentence(parse("Go on Bors, chop his head off!"))
     m = search.search("PRP*? NN*", s)
     self.assertEqual(m[0].string, "Bors")
     self.assertEqual(m[1].string, "his head")
     print("pattern.search.search()")

Exemple #2

0

Afficher le fichier

 def test_document(self):
     # Assert Document properties.
     # Test with different input types.
     for constructor, w in ((vector.Document, "The cats sit on the mat."),
                            (vector.Document,
                             ["The", "cats", "sit", "on", "the",
                              "mat"]), (vector.Document, {
                                  "cat": 1,
                                  "mat": 1,
                                  "sit": 1
                              }), (vector.Document,
                                   Text(parse("The cats sat on the mat."))),
                            (vector.Document,
                             Sentence(parse("The cats sat on the mat.")))):
         # Test copy.
         v = constructor(w,
                         stemmer=vector.LEMMA,
                         stopwords=False,
                         name="Cat",
                         type="CAT")
         v = v.copy()
         # Test properties.
         self.assertEqual(v.name, "Cat")
         self.assertEqual(v.type, "CAT")
         self.assertEqual(v.count, 3)
         self.assertEqual(v.terms, {"cat": 1, "mat": 1, "sit": 1})
         # Test iterator decoration.
         self.assertEqual(sorted(v.features), ["cat", "mat", "sit"])
         self.assertEqual(sorted(v), ["cat", "mat", "sit"])
         self.assertEqual(len(v), 3)
         self.assertEqual(v["cat"], 1)
         self.assertEqual("cat" in v, True)
     print "pattern.vector.Document"

Exemple #3

0

Afficher le fichier

Fichier : test_search.py Projet : sb123456789sb/pattern-1

 def test_match(self):
     # Assert Constraint-Word matching.
     R = search.Constraint.fromstring
     S = lambda s: Sentence(parse(s, relations=True, lemmata=True))
     W = lambda s, tag=None, index=0: search.Word(None, s, tag, index)
     for constraint, tests in (
       (R("cat|dog"),  [(W("cat"), 1), (W("dog"), 1), (W("fish"), 0)]),
       (R("cat*"),     [(W("cats"), 1)]),
       (R("*cat"),     [(W("tomcat"), 1)]),
       (R("c*t|d*g"),  [(W("cat"), 1), (W("cut"), 1), (W("dog"), 1), (W("dig"), 1)]),
       (R("cats|NN*"), [(W("cats", "NNS"), 1), (W("cats"), 0)]),
       (R("^cat"),     [(W("cat", "NN", index=0), 1),(W("cat", "NN", index=1), 0)]),
       (R("*|!cat"),   [(W("cat"), 0), (W("dog"), 1), (W("fish"), 1)]),
       (R("my cat"),   [(W("cat"), 0)]),
       (R("my cat"),   [(S("my cat").words[1], 1)]),  # "my cat" is an overspecification of "cat"
       (R("my_cat"),   [(S("my cat").words[1], 1)]),
       (R("cat|NP"),   [(S("my cat").words[1], 1)]),
       (R("dog|VP"),   [(S("my dog").words[1], 0)]),
       (R("cat|SBJ"),  [(S("the cat is sleeping").words[1], 1)]),
       (R("dog"),      [(S("MY DOGS").words[1], 1)]), # lemma matches
       (R("dog"),      [(S("MY DOG").words[1], 1)])): # case-insensitive
         for test, b in tests:
             self.assertEqual(constraint.match(test), bool(b))
     # Assert Constraint-Taxa matching.
     t = search.Taxonomy()
     t.append("Tweety", type="bird")
     t.append("Steven", type="bird")
     v = search.Constraint.fromstring("BIRD", taxonomy=t)
     self.assertTrue(v.match(W("bird")))
     self.assertTrue(v.match(S("tweeties")[0]))
     self.assertTrue(v.match(W("Steven")))
     print("pattern.search.Constraint.match()")

Exemple #4

0

Afficher le fichier

def myExtract(statement):

  s = Sentence(parse(statement, relations=True, lemmata=True, light=True))
  p = Pattern.fromstring('There be DT NN+')
  match = p.search(s)
  #raise Exception(match)
  return match

Exemple #5

0

Afficher le fichier

def basicExtract(statement):

  #s = Sentence(parse(statement, relations=True, lemmata=True, light=True))
  #p = Pattern.fromstring('(DT) (RB) (JJ) NN+')
  s = Sentence(parse(statement, lemmata=True))
  m = search("There be DT {JJ? NN}", s)
  return m

Exemple #6

0

Afficher le fichier

Fichier : test_search.py Projet : sb123456789sb/pattern-1

 def test_match(self):
     # Assert Match properties.
     s = Sentence(parse("Death awaits you all with nasty, big, pointy teeth."))
     p = search.Pattern(sequence=[
         search.Constraint(tags=["JJ"], optional=True),
         search.Constraint(tags=["NN*"])])
     m = p.search(s)
     self.assertTrue(isinstance(m, list))
     self.assertEqual(m[0].pattern, p)
     self.assertEqual(m[1].pattern, p)
     self.assertEqual(m[0].words, [s.words[0]])
     self.assertEqual(m[1].words, [s.words[-3], s.words[-2]])
     # Assert contraint "NN*" links to "Death" and "teeth", and "JJ" to "pointy".
     self.assertEqual(m[0].constraint(s.words[ 0]), p[1])
     self.assertEqual(m[1].constraint(s.words[-3]), p[0])
     self.assertEqual(m[1].constraint(s.words[-2]), p[1])
     # Assert constraints "JJ NN*" links to chunk "pointy teeth".
     self.assertEqual(m[1].constraints(s.chunks[-1]), [p[0], p[1]])
     # Assert Match.constituents() by constraint, constraint index and list of indices.
     self.assertEqual(m[1].constituents(), [s.words[-3], s.words[-2]])
     self.assertEqual(m[1].constituents(constraint=p[0]), [s.words[-3]])
     self.assertEqual(m[1].constituents(constraint=1), [s.words[-2]])
     self.assertEqual(m[1].constituents(constraint=(0,1)), [s.words[-3], s.words[-2]])
     # Assert Match.string.
     self.assertEqual(m[1].string, "pointy teeth")
     print("pattern.search.Match")

Exemple #7

0

Afficher le fichier

def test_sentence():
    from pattern.en import parse, Text, Sentence
    from pattern.en import pprint 
    
    sent1 = "BS degree ( BSEE or BSCS strongly preferred , MSCS a plus ) and/or the equivalent in training and experience ."
    sent2 = "Bachelor's degree in Computer Science is required."  
    sent3 = "He created the robot and broke it after making it."
    sent4 = "A Computer Science or related degree "    
    sent5 = "bachelors degree in Computer Science or Information Systems and/or related experience required"    
    
    result = parse(sent5,
         tokenize = True,  # Tokenize the input, i.e. split punctuation from words.
             tags = True,  # Find part-of-speech tags.
           chunks = True,  # Find chunk tags, e.g. "the black cat" = NP = noun phrase.
        relations = True,  # Find relations between chunks.
          lemmata = True,  # Find word lemmata.
            light = True)
    pprint(result)
   
    sen = Sentence(result)
  #  print type(sen)
    print sen     

    for chunk in sen.chunks:
       print chunk.type, [(w.string, w.type) for w in chunk.words]

Exemple #8

0

Afficher le fichier

 def test_match_function(self):
     # Assert match() function.
     s = Sentence(parse("Go on Bors, chop his head off!"))
     m1 = search.match("chop NP off", s, strict=False)
     m2 = search.match("chop NP+ off", s, strict=True)
     self.assertEqual(m1.constituents()[1].string, "his head")
     self.assertEqual(m2.constituents()[1].string, "his head")
     print "pattern.search.match()"

Exemple #9

0

Afficher le fichier

def modality(sentence, type=EPISTEMIC):
    """ Returns the sentence's modality as a weight between -1.0 and +1.0.
        Currently, the only type implemented is EPISTEMIC.
        Epistemic modality is used to express possibility (i.e. how truthful is what is being said).
    """
    if isinstance(sentence, basestring):
        try:
            # A Sentence is expected but a string given.
            # Attempt to parse the string on-the-fly.
            from pattern.en import parse, Sentence
            sentence = Sentence(parse(sentence))
        except ImportError:
            pass
    S, n, m = sentence, 0.0, 0
    if not (hasattr(S, "words") and hasattr(S, "parse_token")):
        raise TypeError("%s object is not a parsed Sentence" %
                        repr(S.__class__.__name__))
    if type == EPISTEMIC:
        r = S.string.rstrip(" .!")
        for k, v in epistemic_weaseling.items():
            for phrase in v:
                if phrase in r:
                    n += k
                    m += 2
        for i, w in enumerate(S.words):
            for type, dict, weight in (("MD", epistemic_MD,
                                        4), ("VB", epistemic_VB,
                                             2), ("RB", epistemic_RB,
                                                  2), ("JJ", epistemic_JJ, 1),
                                       ("NN", epistemic_NN,
                                        1), ("CC", epistemic_CC_DT_IN,
                                             1), ("DT", epistemic_CC_DT_IN, 1),
                                       ("IN", epistemic_CC_DT_IN,
                                        1), ("PRP", epistemic_PRP,
                                             1), ("PRP$", epistemic_PRP, 1),
                                       ("WP", epistemic_PRP, 1)):
                # "likely" => weight 1, "very likely" => weight 2
                if i > 0 and s(S[i - 1]) in MODIFIERS:
                    weight += 1
                # likely" => score 0.25 (neutral inclining towards positive).
                if w.type and w.type.startswith(type):
                    for k, v in dict.items():
                        # Prefer lemmata.
                        if (w.lemma or s(w)) in v:
                            # Reverse score for negated terms.
                            if i > 0 and s(S[i - 1]) in ("not", "n't", "never",
                                                         "without"):
                                k = -k * 0.5
                            n += weight * k
                            m += weight
                            break
            # Numbers, citations, explanations make the sentence more factual.
            if w.type in ("CD", "\"", "'", ":", "("):
                n += 0.75
                m += 1
    if m == 0:
        return 1.0  # No modal verbs/adverbs used, so statement must be true.
    return max(-1.0, min(n / (m or 1), +1.0))

Exemple #10

0

Afficher le fichier

Fichier : characterExtraction.py Projet : wolfharan/character-extraction

def extractMood(characterSentences):
    """
    Analyzes the sentence using grammatical mood module from pattern.
    """
    characterMoods = defaultdict(list)
    for key, value in characterSentences.iteritems():
        for x in value:
            characterMoods[key].append(
                mood(Sentence(parse(str(x), lemmata=True))))
    return characterMoods

Exemple #11

0

Afficher le fichier

 def test_convergence(self):
     # Test with random sentences and random patterns to see if it crashes.
     w = ("big", "white", "rabbit", "black", "cats", "is", "was", "going", "to", "sleep", "sleepy", "very", "or")
     x = ("DT?", "JJ?+", "NN*", "VP?", "cat", "[*]")
     for i in range(100):
         s = " ".join(random.choice(w) for i in range(20))
         s = Sentence(parse(s, lemmata=True))
         p = " ".join(random.choice(x) for i in range(5))
         p = search.Pattern.fromstring(p)
         p.search(s)

Exemple #12

0

Afficher le fichier

Fichier : controller.py Projet : matigrojas/pruebaWsd

 def start(self):
     cloudSize = dameCloudSize(self.id_request)
     cloudSize = cloudSize[0][0]
     searchKey = dameSerchKey(self.id_request)
     searchKey = searchKey[0][0]
     step = 0
     while step <= 5:  #Mas adelante setear get_stop; esto indica la cantidad de niveles
         for id_cloud in dameIdCloud(
                 self.id_request
         ):  #Obtiene IDS de los clouds que pertenecen al proyecto
             print "Id Cloud: " + str(id_cloud[0])
             cloud = self.generar_cloud(dameNodo(id_cloud[0]))
             true_nodes = self.trueNodesSelection(cloud)
             for n in true_nodes:
                 try:
                     cloud.graph.node[n]['select'] = False
                     crawler = SimpleCrawler1(n, delay=0.1)
                     crawler.newStructure(cloud.graph)
                     time = 0
                 except:
                     continue
                 while len(crawler.visited) < cloudSize:
                     print "Cloudsize = " + str(
                         cloudSize) + " Crawler Visited = " + str(
                             len(crawler.visited)) + " Nivel =  " + str(
                                 step)
                     print 'Explorando ...'
                     crawler.crawl(method=None)
                     time += 1
                     if time > cloudSize * 10:
                         break
                 actualizarSelect(cloud.graph.node[n]['ID'],
                                  cloud.graph.node[n]['select'])
                 print
                 print '#####Generando documentos#####'
                 #Creacion de minePackage
                 clouds = list()
                 clouds.append(cloud)
                 minePackage = dict()
                 minePackage['clouds'] = clouds
                 minePackage['searchKey'] = searchKey
                 minePackage['searchKeyStemmer'] = count(words(
                     Sentence(parse(searchKey))),
                                                         stemmer=PORTER)
                 self.IRController.start(minePackage)  #Recupera Informacion
                 #FALTA SCRAPPER CONTROLLER
             #Se pone none para que no ocupen espacio innecesario, todo ya fue guardado en BD
             minePackage = None
             cloud = None
             gc.collect
         step += 1
         print "Explorando nivel nro: " + str(step)
         #Controla los niveles a expandir, en este caso 10
     print "Proceso Finalizado"

Exemple #13

0

Afficher le fichier

Fichier : process_sentences.py Projet : folagit/resumatcher

def findVerb(sent):
    result = parse(
        sent,
        tokenize=True,
        tags=True,
    )
    sen = Sentence(result)
    vlist = [word.string for word in sen if word.type.startswith("V")]
    print vlist
    vlist = [word.string for word in sen if word.type.startswith("V")]
    return vlist

Exemple #14

0

Afficher le fichier

Fichier : __init__.py Projet : sebastienvg/skill-smart-eye

 def nouns_and_adjectives(self, results):
     nouns = []
     adjectives = []
     results_tree = parse(results, chunks=False)
     sentence = Sentence(results_tree)
     for word in sentence:
         if word.type == 'NN':
             nouns.append(word.string)
         elif word.type == 'JJ':
             adjectives.append(word.string)
     return nouns, adjectives

Exemple #15

0

Afficher le fichier

 def run(self, minePackage):
     clouds = minePackage['clouds']
     urlContent = UrlToPlainText()
     for cloud in clouds:
         for n in cloud.graph.nodes(
         ):  #Itera una lista de enlaces de la nube
             print cloud.graph.node[n]['link']
             pageContent = urlContent.plainTextConverter(
                 cloud.graph.node[n]['link'])
             cloud.graph.node[n]['methodData'] = MethodData(
                 count(words(Sentence(parse(pageContent))), stemmer=PORTER))

Exemple #16

0

Afficher le fichier

Fichier : thesisfunctions.py Projet : memazouni/nlp-tagging-academic-sources

def Sentlist_tokenizedS(text_tokenizedS):
    
    from pattern.en import Sentence, parse
    
    print("Processing: tokenizing text by sentence")
    Sent_list = []
    for e in text_tokenizedS:
        s = parse(e, lemmata=False, chunks = True)
        s = Sentence(s)
        Sent_list.append(s)
    print("Completed:  text tokenized by sentence")
    return Sent_list

Exemple #17

0

Afficher le fichier

 def __init__(self, data, url="", contenidoBd=""):
     if url != "":
         urlContent = UrlToPlainText()
         self.contenidoConEtiquetas = urlContent.plainTextConverter(
             url, "mantenerEtiquetas")
         self.contenido = plaintext(self.contenidoConEtiquetas, keep={})
     else:
         if (contenidoBd != ""):
             self.contenidoConEtiquetas = contenidoBd
             self.contenido = plaintext(self.contenidoConEtiquetas, keep={})
         else:
             self.contenido = ""
     self.data = count(words(Sentence(parse(self.contenido))),
                       stemmer=PORTER)

Exemple #18

0

Afficher le fichier

 def test_group(self):
     # Assert Match groups.
     s = Sentence(parse("the big black cat eats a tasty fish"))
     m = search.search("DT {JJ+} NN", s)
     self.assertEqual(m[0].group(1).string, "big black")
     self.assertEqual(m[1].group(1).string, "tasty")
     # Assert nested groups (and syntax with additional spaces).
     m = search.search("DT { JJ { JJ { NN }}}", s)
     self.assertEqual(m[0].group(1).string, "big black cat")
     self.assertEqual(m[0].group(2).string, "black cat")
     self.assertEqual(m[0].group(3).string, "cat")
     # Assert chunked groups.
     m = search.search("NP {VP NP}", s)
     v = m[0].group(1, chunked=True)
     self.assertEqual(v[0].string, "eats")
     self.assertEqual(v[1].string, "a tasty fish")
     print "pattern.search.Match.group()"

Exemple #19

0

Afficher le fichier

 def getData(self, params):
     if self.now_cache is not None:
         if (self.now_cache +
                 datetime.timedelta(minutes=5)) < datetime.datetime.now():
             self.data_cache = None
             self.today_cache = None
             self.now_cache = None
     if self.data_cache is None:
         tweets = []
         for cand in candidates:
             tweets.append({
                 'tweets':
                 api.user_timeline(cand['user'], count=20),
                 'name':
                 cand['name'],
                 'party':
                 cand['party']
             })
         all_tweets = []
         for tweet_data in tweets:
             name = tweet_data['name']
             party = tweet_data['party']
             for tweet in tweet_data['tweets']:
                 all_tweets.append({
                     'Name': name,
                     'Tweet': tweet.text,
                     'Favorites': tweet.favorite_count,
                     'Retweets': tweet.retweet_count
                 })
         dfs = pd.DataFrame(all_tweets)
         sentiments = [sentiment(tweet) for tweet in dfs['Tweet']]
         dfs['Polarity'] = [sent[0] for sent in sentiments]
         dfs['Subjectivity'] = [sent[1] for sent in sentiments]
         modal = [
             modality(Sentence(parse(tweet, lemmata=True)))
             for tweet in dfs['Tweet']
         ]
         dfs['Certainty'] = modal
         today = date.strftime(datetime.datetime.now(),
                               format='%m/%d/%Y, %H:%M')
         now = datetime.datetime.now()
         self.data_cache = dfs
         self.today_cache = today
         self.now_cache = now
     return self.data_cache

Exemple #20

0

Afficher le fichier

    def calculate_phrase_sentiment(self, phrases):
        # print "Rating phrases sentiment..."
        valence_list = []
        arousal_list = []
        for p in phrases:
            pol = sentiment(p)[0]
            sent = parse(p, lemmata=True)
            mod = modality(Sentence(sent))
            print mod
            valence_list.append(10 * pol)
            arousal_list.append(5 * mod)

        valence = max(valence_list)
        arousal = max(arousal_list)

        print "Valence: " + str(valence)
        print "arousal: " + str(arousal)
        return ((valence, arousal))

Exemple #21

0

Afficher le fichier

def team_sentiment_analysis(stats):
	for s in stats.sentences:
		this_sentiment = sentiment(s)
		polarity = float("{0:.2f}".format(this_sentiment[0]))
		subjectivity = float("{0:.2f}".format(this_sentiment[1]))
		polarity_10 = float("{0:.1f}".format(this_sentiment[0]))
		subjectivity_10 = float("{0:.1f}".format(this_sentiment[1]))
		stats.polarity_counts[polarity] += 1
		stats.subjectivity_counts[subjectivity] += 1
		stats.polarity_counts_10s[polarity_10] += 1
		stats.subjectivity_counts_10s[subjectivity_10] += 1

		s = Sentence(parse(s, lemmata=True))
		stats.mood_counts[mood(s)] += 1
		rounded_modality = float("{0:.2f}".format(modality(s)))
		rounded_modality_10 = float("{0:.1f}".format(modality(s)))
		stats.modality_counts[rounded_modality] += 1
		stats.modality_counts_10s[rounded_modality_10] += 1

Exemple #22

0

Afficher le fichier

def mood(sentence, **kwargs):
    """Returns IMPERATIVE (command), CONDITIONAL (possibility), SUBJUNCTIVE
    (wish) or INDICATIVE (fact)."""
    if isinstance(sentence, basestring):
        try:
            # A Sentence is expected but a string given.
            # Attempt to parse the string on-the-fly.
            from pattern.en import parse, Sentence
            sentence = Sentence(parse(sentence))
        except ImportError:
            pass
    if imperative(sentence, **kwargs):
        return IMPERATIVE
    if conditional(sentence, **kwargs):
        return CONDITIONAL
    if subjunctive(sentence, **kwargs):
        return SUBJUNCTIVE
    else:
        return INDICATIVE

Exemple #23

0

Afficher le fichier

 def test_search(self):
     # Assert one match containing all words.
     v = search.Pattern.fromstring("*+")
     v = v.search("one two three")
     self.assertEqual(v[0].string, "one two three")
     # Assert one match for each word.
     v = search.Pattern.fromstring("*")
     v = v.search("one two three")
     self.assertEqual(v[0].string, "one")
     self.assertEqual(v[1].string, "two")
     self.assertEqual(v[2].string, "three")
     # Assert all variations are matched (sentence starts with a NN* which must be caught).
     v = search.Pattern.fromstring("(DT) JJ?+ NN*")
     v = v.search(Sentence(parse("dogs, black cats and a big white rabbit")))
     self.assertEqual(v[0].string, "dogs")
     self.assertEqual(v[1].string, "black cats")
     self.assertEqual(v[2].string, "a big white rabbit")
     v = search.Pattern.fromstring("NN*")
     print "pattern.search.Pattern.search()"

Exemple #24

0

Afficher le fichier

Fichier : extractor.py Projet : bluepolarfox/twss

def extract(statement):

  s = Sentence(parse(statement, lemmata=True))

  '''c1 = Constraint.fromstring("There be DT")
  c2 = Constraint.fromstring("NN+")
  c3 = Constraint.fromstring("(DT)")
  c4 = Constraint.fromstring("(RB) (JJ) NNP+")
  c5 = Constraint.fromstring("(call) (DT)")
  c6 = Constraint.fromstring("(RB) (JJ) (NNPS|NNP)+")
  p = Pattern(sequence=[c1, c2, c3, c4, c5, c6]) 
 
  match = p.search(s)
   '''
  s = find_entities(s)
   
   # not sure about this "be" thing - happy to match plural (is/are) but not sure about past tense ...
  match = search(MATCH_STRING, s)
  #raise Exception(match)
  return s, match

Exemple #25

0

Afficher le fichier

Fichier : 07-exclude.py Projet : Abhishek-1/temp

# It does not use modal verbs such as "could" and "would":
# "You could eat your dinner!" is not a command but a bubbly suggestion.

# We can create a pattern that scans for infinitive verbs (VB),
# and use "!" to exclude certain words:
# "!could|!would|!should|!to+ VB" = infinitive not preceded by modal or "to".
# This works fine except in one case: if the sentence starts with a verb.
# So we need a second rule "^VB" to catch this.
# Note that the example below contains a third rule: "^do|VB*".
# This catches all sentences that start with a "do" verb regardless if it is infinitive,
# because the parses sometimes tags infinitive "do" incorrectly.


def imperative(sentence):
    for p in ("!could|!would|!should|!to+ VB", "^VB", "^do|VB*"):
        m = match(p, sentence)
        if match(p, sentence) and sentence.string.endswith(
            (".", "!")):  # Exclude questions.
            return True
    return False


for s in ("Just stop it!", "Look out!", "Do your homework!",
          "You should do your homework.", "Could you stop it.",
          "To be, or not to be."):
    s = parse(s)
    s = Sentence(s)
    print(s)
    print(imperative(s))
    print("")

Exemple #26

0

Afficher le fichier

print()

# Sentence chunks can be matched by tag (e.g. NP, VP, ADJP).
# The pattern below matches anything from
# "the rabbit gnaws at your fingers" to
# "the white rabbit looks at the carrots":
p = Pattern.fromstring("rabbit VP at NP", s)
m = p.search(s)
print(m)
print()

if m:
    for w in m[0].words:
        print(w, " \t=>", m[0].constraint(w))

print()
print("-------------------------------------------------------------")
# Finally, constraints can also include regular expressions.
# To include them we need to use the full syntax instead of the search()
# function:
import re
r = re.compile(r"[0-9|\.]+")  # all numbers
p = Pattern()
p.sequence.append(Constraint(words=[r]))
p.sequence.append(Constraint(tags=["NN*"]))

s = Sentence(parse("I have 9.5 fingers."))
print(s)
print(p.search(s))
print()

Exemple #27

0

Afficher le fichier

Fichier : 04-KNN.py Projet : ADA110/Cibus

# (mail/spam, positive/negative, language, author's age, ...),
# you can predict the type of other "unknown" texts.
# The k-Nearest Neighbor algorithm classifies texts according
# to the k documents that are most similar (cosine similarity) to the given input document.

m = Model()
t = Twitter()

# First, we mine a model of a 1000 tweets.
# We'll use hashtags as type.
for page in range(1, 10):
    for tweet in t.search('#win OR #fail', start=page, count=100, cached=True):
        # If the tweet contains #win hashtag, we'll set its type to 'WIN':
        s = tweet.text.lower()  # tweet in lowercase
        p = '#win' in s and 'WIN' or 'FAIL'  # document labels
        s = Sentence(parse(s))  # parse tree with part-of-speech tags
        s = search('JJ', s)  # adjectives in the tweet
        s = [match[0].string for match in s]  # adjectives as a list of strings
        s = " ".join(s)  # adjectives as string
        if len(s) > 0:
            m.append(Document(s, type=p, stemmer=None))

# Train k-Nearest Neighbor on the model.
# Note that this is a only simple example: to build a robust classifier
# you would need a lot more training data (e.g., tens of thousands of tweets).
# The more training data, the more statistically reliable the classifier becomes.
# The only way to really know if you're classifier is working correctly
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN(baseline=None)  # By default, baseline=MAJORITY
for document in m:  # (classify unknown documents with the most frequent type).
    classifier.train(document)

Exemple #28

0

Afficher le fichier

Fichier : insert_verb_vp.py Projet : hongikgames/vtt_causality

            for word, pos in tag(strSentence):
                if pos in ("VB", "VBD", "VBG", "VBN", "VBP", "VBZ"):
                    word = str(lemma(word))

                    if (word not in ("be", "do", "let", "begin", "have", "try",
                                     "start")):
                        verbList.append(word)
                        verbSentList.append(sid.polarity_scores(word))

                        #con.execute("INSERT OR IGNORE INTO verbList VALUES(?, ?)", (lemma(word),0,))
                        #con.execute("UPDATE verbList SET count = count + 1 WHERE verb=?", (lemma(word),))

            a = parse(strSentence, relations=True, lemmata=True)
            #pprint(a)

            sentence = Sentence(a)
            for i in range(0, len(sentence.verbs) - 1):
                strVP = str(' '.join(sentence.verbs[i].lemmata))
                if (strVP not in ("be", "do", "let", "begin", "have", "try",
                                  "start")):
                    vpList.append(strVP)
                    vpSentList.append(sid.polarity_scores(strVP))

            #print(sentence.relations)
            #print(sentence.subjects)
            #print(sentence.objects)
            #print(sentence.verbs)
            #print(sentence.chunk)

            # sqlite3 insert : subject / objects / verbs / CPC / Sentiment
            #   genre, wordCount, filename, sentence

Exemple #29

0

Afficher le fichier

def load_text(filename):
    lines = [line.strip().split('\t') for line in open(filename)][1:]
    return [
        Sentence(format_sentence(sentence))
        for sentence in group_sentences(lines)
    ]

Exemple #30

0

Afficher le fichier

from pattern.search import search, Pattern, Constraint
from pattern.en     import Sentence, parse

# This example demonstrates an interesting search pattern that mines for comparisons.
# Notice the use of the constraint "be".
# If the output from the parser includes word lemmas (e.g. "doing" => "do")
# these will also be matched. Using "be" then matches "is", "being", "are", ...
# and if underspecification is used "could be", "will be", "definitely was", ...

p = Pattern.fromstring("NP be (more) ADJP|ADVP than NP")

for s in (
  "the turtle was faster than the hare",
  "Arnold Schwarzenegger is more dangerous than Dolph Lundgren"):
    s = s = Sentence(parse(s, lemmata=True)) # parse lemmas
    m = p.search(s)
    print s
    print
    print m
    print
    if m:
        print m[0].constituents()                   # Words grouped by chunk whenever possible.
        print m[0].constraints(chunk=s.chunks[0])   # The constraints that match the given chunk.
        print m[0].constituents(constraint=p[0])    # Constituents for the given constraint.
        print m[0].constituents(constraint=[0,3,5]) # Constituents for the given constraint indices.
        print
        print
        print