Exemple #1
0
def get_entities_from_sent(f1):
    entities = {}

    nlp = spacy.load('pt_core_news_sm')
    doc = nlp(f1)
    for ent in doc.ents:
        list = []
        if get(entities, str(ent.label_)) is not None:
            list = get(entities, str(ent.label_))
        list = push(list, str(ent))
        set_(entities, str(ent.label_), list)

    nlp = spacy.load('en_core_web_sm')
    doc = nlp(f1)
    for ent in doc.ents:
        if str(ent.label_) == 'TIME' or str(ent.label_) == 'ORDINAL' or str(
                ent.label_) == 'DATE' or str(ent.label_) == 'CARDINAL' or str(
                    ent.label_) == 'MONEY' or str(ent.label_) == 'QUANTITY':
            list = []
            if get(entities, str(ent.label_)) is not None:
                list = get(entities, str(ent.label_))
            list = push(list, str(ent))
            set_(entities, str(ent.label_), list)

    return entities
Exemple #2
0
def get_word_entitie_from_sent(f1):
    doc = nlp(f1)
    #print(doc)
    entities = []
    for ent in doc.ents:
        tuple = str(ent) + " " + str(ent.label_)
        push(entities, tuple)
    return entities
Exemple #3
0
def get_syntactic_dependency_from_sent(f1):

    doc = nlp(f1)
    dependencies = []
    for token in doc:
        push(dependencies, (token.head.text, token.text, token.dep_))

    return dependencies
Exemple #4
0
def get_obj_pos_from_sent(f1):

    doc = nlp(f1)
    pos = {
        "ADJ": [],
        "ADP": [],
        "ADV": [],
        "AUX": [],
        "CONJ": [],
        "CCONJ": [],
        "DET": [],
        "INTJ": [],
        "NOUN": [],
        "NUM": [],
        "PART": [],
        "PRON": [],
        "PROPN": [],
        "PUNCT": [],
        "SCONJ": [],
        "SYM": [],
        "VERB": [],
        "X": []
    }
    for token in doc:
        list = []
        id = (str(token.pos_))
        # if get(pos, id) is not None:
        #     list = get(pos, id)
        list = push(list, (str(token.orth_)).lower())
        set_(pos, id, list)
    return pos
Exemple #5
0
def process_adverb(adverbs, f1, f2):
    f1 = f1.lower()
    f2 = f2.lower()
    doc = nlp(f1)
    dependencies = []
    len1 = 0
    len2 = 0
    adv1 = []
    adv2 = []
    for item in adverbs:
        item = item.lower()
        if item in f1:
            len1 = len1 + 1
            push(adv1, str(item))
        if item in f2:
            len2 = len2 + 1
            push(adv2, str(item))

    return [len1, len2, abs(len1 - len2), len_intersection(adv1, adv2)]
Exemple #6
0
def chooseSong(song_data):
    """
    chooses a song to tweet and updates recently choosen songs
    """
    recent = song_data['recentSongIds']
    songs = song_data['songs']

    # filter out recently choosen songs and randomly choose a song
    filtered_song_ids = pydash.filter_(songs.keys(), lambda x: x not in recent)
    song_id = pydash.sample(filtered_song_ids)

    # get chosen song and increment play count
    song = songs[song_id]
    song['playCount'] += 1

    # pop least recently choosen song and push new one
    if len(recent) == 7:
        pydash.shift(recent)
    pydash.push(recent, song_id)

    return song
Exemple #7
0
def get_word_pos_from_2_sents(f1, f2):
    doc = nlp(f1)
    dependencies = []
    for token in doc:
        if (token.pos_ == "PUNCT" or token.pos_ == "SPACE"):
            continue
        word_pos = deburr(token.text.lower()) + " " + token.pos_
        push(dependencies, word_pos)
    s1 = dependencies
    dependencies = []

    doc = nlp(f2)
    for token in doc:
        if (token.pos_ == "PUNCT" or token.pos_ == "SPACE"):
            continue
        #print(token.text, token.pos_)
        word_pos = deburr(token.text.lower()) + " " + token.pos_
        push(dependencies, word_pos)
    s2 = dependencies

    return s1, s2
Exemple #8
0
    def datetime_clusterer(tuplet: Tuple[List[Dict], int],
                           message: Dict) -> Tuple[List, int]:

        message_anchored_time = round_to_nearest_interval_minutes(
            message.get('timestamp'))

        # No cluster: create a cluster
        if not tuplet:
            return [{'rate': 1, 'time': message_anchored_time}], 1

        pulse_clusters, max_pulse_rate = tuplet
        latest_pulse = pulse_clusters.pop()

        # Message fits in cluster - cluster it up!
        if latest_pulse.get('time') == message_anchored_time:

            new_pulse_rate = latest_pulse.get('rate') + 1
            new_pulse_clusters = _.push(
                pulse_clusters, _.assign(latest_pulse,
                                         {'rate': new_pulse_rate}))

            if max_pulse_rate >= new_pulse_rate:
                return new_pulse_clusters, max_pulse_rate
            return new_pulse_clusters, new_pulse_rate

        # Message doesn't fit in cluster
        # lock in latest cluster, create new cluster but also fill
        # in missing clusters in between
        old_pulse_cluster = _.push(pulse_clusters, latest_pulse)
        old_pulse_cluster = _.concat(
            zero_pulses(start_time=latest_pulse.get('time'),
                        end_time=message_anchored_time,
                        interval=interval), old_pulse_cluster)

        return _.push(old_pulse_cluster,
                      _.assign({}, {
                          'rate': 1,
                          'time': message_anchored_time
                      })), max_pulse_rate
Exemple #9
0
def zero_pulses(start_time: datetime, end_time: datetime,
                interval: int) -> List[Dict]:
    minutes_difference = end_time - start_time
    intervals_difference = \
      ((minutes_difference.seconds // 60) // interval) - 1

    if intervals_difference > 0:
        pulses = []
        for x in range(0, intervals_difference):
            pulses = _.push(
                [], {
                    'rate': 0,
                    'time': start_time + timedelta(minutes=interval * (x + 1))
                })
        return pulses

    return []
Exemple #10
0
def test_push(case, expected):
    assert _.push(*case) == expected
Exemple #11
0
    def remove_nones(pulses: List[Union[List[Dict], None]],
                     pulse: Union[List[Dict], None]) -> List[List[Dict]]:
        if pulse is None:
            return pulses

        return _.push(pulses, pulse)
Exemple #12
0
def get_sinonimos(palavra):
    # print("-------------")
    # print(palavra)
    palavra = deburr(kebab_case(palavra))
    base_url = "https://www.sinonimos.com.br/"

    list_no_occur = get(nao_consta, "sinonimos")

    if has(sinonimos, palavra):
        #print("Palavra já consta no dicionário de sinônimos")
        return get(sinonimos, palavra)
    if palavra in list_no_occur:
        #print("Não foram encontrados sinônimos para a palavra")
        return []
    else:
        #print("\nNova Palavra: ", palavra)
        request_fail = True
        while request_fail:
            try:
                site = requests.get(base_url + palavra)
                request_fail = False
            except Exception as e:
                print(e)
                pass
        data = BeautifulSoup(site.content, 'html.parser')

        try:
            h1 = data.find('h1').getText()
        except Exception as e:
            if palavra is not None:
                list_no_occur = push(list_no_occur, palavra)
            set_(nao_consta, "sinonimos", list_no_occur)
            save_json("nao_consta", nao_consta)
            return []

        if (h1 == "Página Não Encontrada"):
            #print(h1)
            if palavra is not None:
                list_no_occur = push(list_no_occur, palavra)
            set_(nao_consta, "sinonimos", list_no_occur)
            save_json("nao_consta", nao_consta)
            return []
        else:
            content = data.find('div', attrs={'id': 'content'})
            try:
                div = content.findAll('div', attrs={'class': 's-wrapper'})
            except Exception as e:
                print(e)
                if palavra is not None:
                    list_no_occur = push(list_no_occur, palavra)
                set_(nao_consta, "sinonimos", list_no_occur)
                save_json("nao_consta", nao_consta)
                return []
            aux = 0
            for sentido in div:
                aux = aux + 1
                lista_sinonimos = []
                try:
                    try:
                        key = lower_case(
                            sentido.find('div', attrs={
                                'class': 'sentido'
                            }).getText().strip(":"))
                    except Exception as e:
                        print(e)
                        key = "sinonimos" + str(aux)
                        pass
                    values = sentido.findAll('a',
                                             attrs={'class': 'sinonimo'},
                                             text=True)
                    values2 = sentido.findAll('span')
                    # print(values2)
                    all_values = union(values, values2)
                    #print(all_values)
                    for value in all_values:
                        lista_sinonimos.append(value.getText().strip(":"))
                    set_(sinonimos, palavra + "." + key, lista_sinonimos)
                    print("Sinônimo Salv@ no Dicionário")
                except Exception as e:
                    print("\nError:\n" + str(e))
                    return []
            save_json("sinonimos", sinonimos)
            return get(sinonimos, palavra)
Exemple #13
0
def get_antonimos(palavra):
    # print("-------------")
    # print(palavra)
    palavra = deburr(kebab_case(palavra))
    base_url = "https://www.antonimos.com.br/"

    list_no_occur = get(nao_consta, "antonimos")

    if has(antonimos, palavra):
        #print("Palavra já consta no dicionário de antônimos")
        return get(antonimos, palavra)
    if palavra in list_no_occur:
        #print("Não foram encontrados antônimos para a palavra")
        return []
    else:
        request_fail = True
        while request_fail:
            try:
                site = requests.get(base_url + palavra)
                request_fail = False
            except Exception as e:
                print(e)
                pass
        data = BeautifulSoup(site.content, 'html.parser')

        try:
            h1 = data.find('h1').getText()
        except Exception as e:
            if palavra is not None:
                list_no_occur = push(list_no_occur, palavra)
            set_(nao_consta, "antonimos", list_no_occur)
            save_json("nao_consta", nao_consta)
            return []
        if (h1 == "Página Não Encontrada"):
            #print(h1)
            if palavra is not None:
                list_no_occur = push(list_no_occur, palavra)
            set_(nao_consta, "antonimos", list_no_occur)
            save_json("nao_consta", nao_consta)
            return []
        else:
            content = data.find('div', attrs={'id': 'content'})
            try:
                div = content.findAll('div', attrs={'class': 's-wrapper'})
            except Exception as e:
                print(e)
                if palavra is not None:
                    list_no_occur = push(list_no_occur, palavra)
                set_(nao_consta, "antonimos", list_no_occur)
                save_json("nao_consta", nao_consta)
            aux = 0
            for sentido in div:
                aux = aux + 1
                lista_antonimos = []
                try:
                    try:
                        key = lower_case(
                            sentido.find('div', attrs={
                                'class': 'sentido'
                            }).getText().strip(":"))
                    except Exception as e:
                        key = lower_case(
                            sentido.find('strong').getText().strip("."))
                        pass
                    #print(sentido.find('p', attrs={'class':'antonimos'}))
                    p = sentido.find('p', attrs={
                        'class': 'antonimos'
                    }).getText()[3:]
                    try:
                        p = str(p.encode('raw_unicode_escape').decode('utf-8'))
                    except Exception as e:
                        print(e)
                        pass
                    #print(p)
                    #print(p.encode('utf-8'))
                    all_values = p.split(',')
                    #print(all_values)
                    for value in all_values:
                        lista_antonimos.append(value.strip(":").strip(' '))
                    set_(antonimos, palavra + "." + key, lista_antonimos)
                    print("Antônimo Salv@ no Dicionário")
                except Exception as e:
                    print("\nError:\n" + str(e))
                    return []
            save_json("antonimos", antonimos)
            return get(antonimos, palavra)