Esempio n. 1
0
def process(sentence):

    t = parsetree(sentence, lemmata=True)
    greeting=t.string.strip()

    greeting_list_q=["hola", "buenas"]
    greeting_list_a=["hola", "buenas"]
    if greeting.lower() in greeting_list_q:
        r=dict()
        r['type']='direct_answer'
        r['message']=greeting_list_a[randint(0,len(greeting_list_a)-1)]
        return r

    greeting_list_q=["que tal", "como estas", u"cómo estás", "como va",  
        "como te encuentras", "va todo bien"]
    greeting_list_a=["Estoy bien, gracias por preguntar"]
    if greeting.lower() in greeting_list_q:
        r=dict()
        r['type']='direct_answer'
        r['message']=greeting_list_a[randint(0,len(greeting_list_a)-1)]
        return r

    greeting_list_q=["buenos dias", "buenas tardes", "buenas noches"]
    greeting_list_a=["hola", "buenas"]
    if greeting.lower() in greeting_list_q:
        r=dict()
        r['type']='direct_answer'
        r['message']=greeting_list_a[randint(0,len(greeting_list_a)-1)]
        return r

    return None
Esempio n. 2
0
def process(sentence):
    t = parsetree(sentence, lemmata=True)
    m=pattern_utils.pattern_match("que {VP} {NP}", t)
    if m:
        n, g, noun = pattern_utils.parse_NP(m.group(2))
        r=dict()
        r['type']='query'
        r['question']='que'
        r["relation"]=conjugate(m.group(1).string, INFINITIVE)
        r['gender']=g
        r['object']=noun
        return r

    m=pattern_utils.pattern_match("como {VP} {NP}", t)
    if m:
        n, g, noun = pattern_utils.parse_NP(m.group(2))
        r=dict()
        r['type']='query'
        r['question']='como'
        r["relation"]=conjugate(m.group(1).string, INFINITIVE)
        r['gender']=g
        r['object']=noun
        return r

    return None
Esempio n. 3
0
def nnps_and_keywords(text):
    s = parsetree(text, relations=True, lemmata=True)

    nnp_kw = {}
    for e in s:
        d = Document(e)
        kw = d.keywords()

        nnp = set()
        for w in kw:
            if w[1].type == 'NNP':
                wdstr = []
                for wd in w[1].phrase.words:
                    if wd.type == 'NNP':
                        wdstr.append(wd.string)
                nnp.add("-".join(wdstr))


        kw = d.keywords(top=5)
        words = set()
        for w in kw:
            if w[1].type != 'NNP':
                if w[1].lemma:
                    words.add(w[1].lemma)
                else:
                    words.add(w[1].string)

        if len(nnp)>1 and len(words)>1:
            if tuple(nnp) in nnp_kw:
                nnp_kw[tuple(nnp)].update(words)
            else:
                nnp_kw[tuple(nnp)]=words

    return nnp_kw
Esempio n. 4
0
 def __call__(self, text, default=None):
     result = []
     for sentence in parsetree(text):
         result.extend(self.entity(sentence))
     result = [item for item in result if item]
     if not result:
         return None
     return result
Esempio n. 5
0
def lemma_esp(text):
    text = codecs.decode(text, "utf-8")
    text = unidecode(text)
    try:
        if (langid.classify(text.encode('utf-8').decode('utf-8'))[0] != "es"):
            return text
        else:
            s = parsetree(text, lemmata=True)
            lista_palabras = s.sentences[0].words
            lista_lemas = map(lambda x: x.lemma, lista_palabras)
            texto = ' '.join(lista_lemas)
            return texto
    except:
        s = parsetree(text, lemmata=True)
        lista_palabras = s.sentences[0].words
        lista_lemas = map(lambda x: x.lemma, lista_palabras)
        texto = ' '.join(lista_lemas)
        return texto
Esempio n. 6
0
    def parse(self, text):
      p_tree = parsetree(text, relations=True, lemmata=True)
      sentences = []
      for p_sentence in p_tree:
        sentence = []
        for p_word in p_sentence.words:
          sentence.append(self.classifier.classify(p_word))
        sentences.append({"words": sentence, "string": p_sentence.string})

      return sentences
Esempio n. 7
0
def pattern_match(pattern, sentence):

    if type(sentence) is not Text:
        sentence = parsetree(sentence, lemmata=True)

    p = Pattern.fromstring(pattern)
    try:
        m = p.match(sentence)
        return m
    except:
        return None
Esempio n. 8
0
    def parse(self, text):
        """ Parses the text and extract the sources, reporters and entities.
        """
        self.__sources, self.__reporters, self.__entities = [], [], []
        text = self._clean(text)

        # POS-Tagging with relations and lemmas
        self.__tree = parsetree(text, relations=True, lemmata=True)

        # Extract the information
        self._extract_sources()
        self._extract_reporters()
Esempio n. 9
0
def verbosInfinitivos(cadena):
	t = parsetree(cadena)
	verbos = search('VB*', t) 
	#lis=verbos.match.string
	#print 'list: ',lis
	#print #no puedo convertirlo a lista de una??
	lista =[]
	for match in verbos:
		lista.append((match.string , conjugate(match.string, INFINITIVE)))
	#print 'lista for: ',lista
	#print lista[3][1] 
	return lista
Esempio n. 10
0
def verbosInfinitivos(cadena):
    t = parsetree(cadena)
    verbos = search('VB*', t)
    print('verbos =', verbos)
    #lis=verbos.match.string
    #print ('list: ',lis)
    #print() #no puedo convertirlo a lista de una?? LAMBDA
    lista = []
    for match in verbos:
        lista.append((match.string, conjugate(match.string, INFINITIVE)))
    #print ('lista for: ',lista)
    #print (lista[3][1])
    return lista
Esempio n. 11
0
def compute_topics(set_reduce_topics, today):
    # Based on similarity
    # Based on words
    cleanup_topic(today.day, today.month, today.year)
    ScrapedTopicGroups.sync()
    sites = SiteNewsScrapedData.objects.all()
    documents = []
    for site in sites:
        for sentence in site.content.split('.'):
            if sentence:
                tree = parsetree(sentence, lemmata=True)
                if len(tree) > 0:
                    documents.append(tree[0])

    documents = [[w.lemma for w in document if
                  w.tag.startswith((u'NN', u'NNS', u'NNP', u'NNPS')) and w.lemma not in settings.STOP_WORDS] for
                 document in documents]

    documents = [Document(" ".join(document) + '.') for document in documents if len(document) > 1]
    model = Model_Comp(documents=documents)

    # format: (distribution, Document)
    documents_analyzed = []
    for document in documents:
        tokens = []
        similar_items_news = model.nearest_neighbors(document)
        for similarity, sim_document in similar_items_news:
            if similarity > 0.95 and sim_document.id not in documents_analyzed:
                tokens.extend([word for word, _ in sim_document.words.iteritems()])
                documents_analyzed.append(sim_document.id)
        # Added is there some document similar
        if document.id not in documents_analyzed:
            tokens.extend([word for word, _ in document.words.iteritems()])
            documents_analyzed.append(document.id)
        # filter the most relevant words (based on count)
        counter = defaultdict(int)
        for token in tokens:
            counter[token] += 1
        # Order counter desc
        tokens_org = sorted(counter.items(), key=lambda element: element[1], reverse=True)
        tokens = [token for token, count in tokens_org[:3]]
        if tokens and len(tokens) > 0:
            links = SiteNewsScrapedData.find_coincidences(tokens)
            # Filtrar solamente si tiene mas de 3 links
            if len(links) > 3:
                ScrapedTopicGroups.create(tags=tokens, links=links, relevance=len(links),
                                        day=today.day, month=today.month, year=today.year)
    if set_reduce_topics:
        reduce_topics(today.day, today.month, today.year)
    return True
def interactive_loader(file_name):
  GROUP_SIZE = 3

  text = file(file_name).read()

  words = parsetree(text, tags=False, chunks=False).words

  for word_group in zip(*[iter(words)]*GROUP_SIZE):
     
    options = ""

    d_words = []

    for word in word_group:
      d_word = DWords.find_word(word.string.lower()) or DWord(word.string.lower())
      if not (d_word.has_polarity() or d_word.is_modifier()):
        d_words.append(d_word)

    if len(d_words) == 0:
      continue


    while len(options)!=len(d_words):
      print "\t".join(map((lambda w: w.word), d_words))
      options = list(raw_input("0:neutral\t1:positive\t2: negative\t3:inversor\t5:minimizer\t6:maximizer\td:descartar\n")[:len(d_words)])
    
    for d_word in d_words:
      option = options.pop(0)
      if option=="0":
        d_word.polarity = 0
      elif option=="1": 
        d_word.polarity = 1 
      elif option=="2":
        d_word.polarity = -1
      elif option == "3":
        d_word.modifier = -1
      elif option == "5":
        d_word.modifier = 0.5
      elif option == "6":
        d_word.modifier = 2

      if option!="d":
        DWords.insert_word(d_word)
        save_in_file(d_word)
    print "\n\n\n"
 def getLemmas(self, words):
     lemmas = []
     for word in words:
         lemmas.append(parsetree(word, lemmata=True)[0].lemma[0])
     return lemmas
Esempio n. 14
0
from pattern.es import parsetree
s = 'The mobile web is more important than mobile apps.'
s = parsetree(s)
print(s)
for sentence in s:
    print(sentence)
    for chunk in sentence.chunks:
        print(chunk)
        for word in chunk.words:
            print(word)
Esempio n. 15
0
def verbosInfinitivos(cadena):
    lis = limpiar_str(cadena).split(' ')
    t = parsetree(cadena)
    verbos = search('VB', t)
    print('Verbos :', verbos)
Esempio n. 16
0
# coding: utf-8
import argparse
from pattern.es import parsetree
from pattern.vector import Document
import json
from operator import itemgetter
from itertools import groupby

from pprint import pprint

parser = argparse.ArgumentParser(description='Find character names in text blobs. Create graph.')

parser.add_argument('--text', type=argparse.FileType('r'), required=True, help='find names here')

args   = parser.parse_args()

        
s = parsetree(args.text.read(), relations=True, lemmata=True)


for i in range(len(s)):
    sentence = s[i]
    print "[%s]"%i, s[i].string.encode('utf8')


Esempio n. 17
0
                    type=argparse.FileType('w'),
                    required=True,
                    help='pickle to output graph')

args = parser.parse_args()

last_names = []
for f in args.names:
    for name in f.readlines():
        last_names.append(
            name.replace("Á", 'A').replace("á", 'a').replace("É", 'E').replace(
                "é", 'e').replace("Í", 'I').replace("í", 'i').replace(
                    "Ó", 'O').replace("ó", 'o').replace("Ú", 'U').replace(
                        "ú", 'u').upper().strip())

s = parsetree(args.text.read(), relations=True, lemmata=True)


def names_from_dict(nis):

    names_in_sentence = nis.copy()
    indexes = names_in_sentence.keys()
    indexes.sort()

    names = []
    for k, g in groupby(enumerate(indexes), lambda (i, x): i - x):
        name = []
        for i in map(itemgetter(1), g):
            name.append(names_in_sentence[i])
        names.append(" ".join([n.capitalize() for n in name]))
Esempio n. 18
0
def stem_lemma(word):     
    word = parsetree(word, lemmata=True)[0].lemmata[0]
    word = stemmer.stem(word) 
    return word
Esempio n. 19
0
# coding: utf-8

from pattern.es import parsetree

theogony = open('data/narco/SresNarco/narco.txt').read()

s = parsetree(theogony, relations=True, lemmata=True)

for e in s:
    try:
        for v in e.verbs:
            if v.subject and v.object:
                subjects = []
                for w in v.subject:
                    if w.type == 'NNP':
                        subjects.append(w.string)
                objects = []
                for w in v.object:
                    if w.type == 'NNP':
                        objects.append(w.string)

                if objects and subjects:
                    print subjects, v.lemmata, objects

    except:
        pass
Esempio n. 20
0
def Word_list_to_Text(Word_list):
    string=''
    for w in Word_list:
        string+=w.string+' '
    
    return parsetree(string)
Esempio n. 21
0
# coding: utf-8

from pattern.es import parsetree


theogony = open('data/narco/SresNarco/narco.txt').read()




s = parsetree(theogony, relations=True, lemmata=True)

for e in s:
    try:
        for v in e.verbs:
            if v.subject and v.object:
                subjects = []
                for w in v.subject:
                    if w.type == 'NNP':
                        subjects.append( w.string )
                objects = []
                for w in v.object:
                    if w.type == 'NNP':
                        objects.append( w.string )

                if objects and subjects:
                    print subjects, v.lemmata, objects
                        
    except:
        pass