def get_entities_from_sent(f1): entities = {} nlp = spacy.load('pt_core_news_sm') doc = nlp(f1) for ent in doc.ents: list = [] if get(entities, str(ent.label_)) is not None: list = get(entities, str(ent.label_)) list = push(list, str(ent)) set_(entities, str(ent.label_), list) nlp = spacy.load('en_core_web_sm') doc = nlp(f1) for ent in doc.ents: if str(ent.label_) == 'TIME' or str(ent.label_) == 'ORDINAL' or str( ent.label_) == 'DATE' or str(ent.label_) == 'CARDINAL' or str( ent.label_) == 'MONEY' or str(ent.label_) == 'QUANTITY': list = [] if get(entities, str(ent.label_)) is not None: list = get(entities, str(ent.label_)) list = push(list, str(ent)) set_(entities, str(ent.label_), list) return entities
def get_word_entitie_from_sent(f1): doc = nlp(f1) #print(doc) entities = [] for ent in doc.ents: tuple = str(ent) + " " + str(ent.label_) push(entities, tuple) return entities
def get_syntactic_dependency_from_sent(f1): doc = nlp(f1) dependencies = [] for token in doc: push(dependencies, (token.head.text, token.text, token.dep_)) return dependencies
def get_obj_pos_from_sent(f1): doc = nlp(f1) pos = { "ADJ": [], "ADP": [], "ADV": [], "AUX": [], "CONJ": [], "CCONJ": [], "DET": [], "INTJ": [], "NOUN": [], "NUM": [], "PART": [], "PRON": [], "PROPN": [], "PUNCT": [], "SCONJ": [], "SYM": [], "VERB": [], "X": [] } for token in doc: list = [] id = (str(token.pos_)) # if get(pos, id) is not None: # list = get(pos, id) list = push(list, (str(token.orth_)).lower()) set_(pos, id, list) return pos
def process_adverb(adverbs, f1, f2): f1 = f1.lower() f2 = f2.lower() doc = nlp(f1) dependencies = [] len1 = 0 len2 = 0 adv1 = [] adv2 = [] for item in adverbs: item = item.lower() if item in f1: len1 = len1 + 1 push(adv1, str(item)) if item in f2: len2 = len2 + 1 push(adv2, str(item)) return [len1, len2, abs(len1 - len2), len_intersection(adv1, adv2)]
def chooseSong(song_data): """ chooses a song to tweet and updates recently choosen songs """ recent = song_data['recentSongIds'] songs = song_data['songs'] # filter out recently choosen songs and randomly choose a song filtered_song_ids = pydash.filter_(songs.keys(), lambda x: x not in recent) song_id = pydash.sample(filtered_song_ids) # get chosen song and increment play count song = songs[song_id] song['playCount'] += 1 # pop least recently choosen song and push new one if len(recent) == 7: pydash.shift(recent) pydash.push(recent, song_id) return song
def get_word_pos_from_2_sents(f1, f2): doc = nlp(f1) dependencies = [] for token in doc: if (token.pos_ == "PUNCT" or token.pos_ == "SPACE"): continue word_pos = deburr(token.text.lower()) + " " + token.pos_ push(dependencies, word_pos) s1 = dependencies dependencies = [] doc = nlp(f2) for token in doc: if (token.pos_ == "PUNCT" or token.pos_ == "SPACE"): continue #print(token.text, token.pos_) word_pos = deburr(token.text.lower()) + " " + token.pos_ push(dependencies, word_pos) s2 = dependencies return s1, s2
def datetime_clusterer(tuplet: Tuple[List[Dict], int], message: Dict) -> Tuple[List, int]: message_anchored_time = round_to_nearest_interval_minutes( message.get('timestamp')) # No cluster: create a cluster if not tuplet: return [{'rate': 1, 'time': message_anchored_time}], 1 pulse_clusters, max_pulse_rate = tuplet latest_pulse = pulse_clusters.pop() # Message fits in cluster - cluster it up! if latest_pulse.get('time') == message_anchored_time: new_pulse_rate = latest_pulse.get('rate') + 1 new_pulse_clusters = _.push( pulse_clusters, _.assign(latest_pulse, {'rate': new_pulse_rate})) if max_pulse_rate >= new_pulse_rate: return new_pulse_clusters, max_pulse_rate return new_pulse_clusters, new_pulse_rate # Message doesn't fit in cluster # lock in latest cluster, create new cluster but also fill # in missing clusters in between old_pulse_cluster = _.push(pulse_clusters, latest_pulse) old_pulse_cluster = _.concat( zero_pulses(start_time=latest_pulse.get('time'), end_time=message_anchored_time, interval=interval), old_pulse_cluster) return _.push(old_pulse_cluster, _.assign({}, { 'rate': 1, 'time': message_anchored_time })), max_pulse_rate
def zero_pulses(start_time: datetime, end_time: datetime, interval: int) -> List[Dict]: minutes_difference = end_time - start_time intervals_difference = \ ((minutes_difference.seconds // 60) // interval) - 1 if intervals_difference > 0: pulses = [] for x in range(0, intervals_difference): pulses = _.push( [], { 'rate': 0, 'time': start_time + timedelta(minutes=interval * (x + 1)) }) return pulses return []
def test_push(case, expected): assert _.push(*case) == expected
def remove_nones(pulses: List[Union[List[Dict], None]], pulse: Union[List[Dict], None]) -> List[List[Dict]]: if pulse is None: return pulses return _.push(pulses, pulse)
def get_sinonimos(palavra): # print("-------------") # print(palavra) palavra = deburr(kebab_case(palavra)) base_url = "https://www.sinonimos.com.br/" list_no_occur = get(nao_consta, "sinonimos") if has(sinonimos, palavra): #print("Palavra já consta no dicionário de sinônimos") return get(sinonimos, palavra) if palavra in list_no_occur: #print("Não foram encontrados sinônimos para a palavra") return [] else: #print("\nNova Palavra: ", palavra) request_fail = True while request_fail: try: site = requests.get(base_url + palavra) request_fail = False except Exception as e: print(e) pass data = BeautifulSoup(site.content, 'html.parser') try: h1 = data.find('h1').getText() except Exception as e: if palavra is not None: list_no_occur = push(list_no_occur, palavra) set_(nao_consta, "sinonimos", list_no_occur) save_json("nao_consta", nao_consta) return [] if (h1 == "Página Não Encontrada"): #print(h1) if palavra is not None: list_no_occur = push(list_no_occur, palavra) set_(nao_consta, "sinonimos", list_no_occur) save_json("nao_consta", nao_consta) return [] else: content = data.find('div', attrs={'id': 'content'}) try: div = content.findAll('div', attrs={'class': 's-wrapper'}) except Exception as e: print(e) if palavra is not None: list_no_occur = push(list_no_occur, palavra) set_(nao_consta, "sinonimos", list_no_occur) save_json("nao_consta", nao_consta) return [] aux = 0 for sentido in div: aux = aux + 1 lista_sinonimos = [] try: try: key = lower_case( sentido.find('div', attrs={ 'class': 'sentido' }).getText().strip(":")) except Exception as e: print(e) key = "sinonimos" + str(aux) pass values = sentido.findAll('a', attrs={'class': 'sinonimo'}, text=True) values2 = sentido.findAll('span') # print(values2) all_values = union(values, values2) #print(all_values) for value in all_values: lista_sinonimos.append(value.getText().strip(":")) set_(sinonimos, palavra + "." + key, lista_sinonimos) print("Sinônimo Salv@ no Dicionário") except Exception as e: print("\nError:\n" + str(e)) return [] save_json("sinonimos", sinonimos) return get(sinonimos, palavra)
def get_antonimos(palavra): # print("-------------") # print(palavra) palavra = deburr(kebab_case(palavra)) base_url = "https://www.antonimos.com.br/" list_no_occur = get(nao_consta, "antonimos") if has(antonimos, palavra): #print("Palavra já consta no dicionário de antônimos") return get(antonimos, palavra) if palavra in list_no_occur: #print("Não foram encontrados antônimos para a palavra") return [] else: request_fail = True while request_fail: try: site = requests.get(base_url + palavra) request_fail = False except Exception as e: print(e) pass data = BeautifulSoup(site.content, 'html.parser') try: h1 = data.find('h1').getText() except Exception as e: if palavra is not None: list_no_occur = push(list_no_occur, palavra) set_(nao_consta, "antonimos", list_no_occur) save_json("nao_consta", nao_consta) return [] if (h1 == "Página Não Encontrada"): #print(h1) if palavra is not None: list_no_occur = push(list_no_occur, palavra) set_(nao_consta, "antonimos", list_no_occur) save_json("nao_consta", nao_consta) return [] else: content = data.find('div', attrs={'id': 'content'}) try: div = content.findAll('div', attrs={'class': 's-wrapper'}) except Exception as e: print(e) if palavra is not None: list_no_occur = push(list_no_occur, palavra) set_(nao_consta, "antonimos", list_no_occur) save_json("nao_consta", nao_consta) aux = 0 for sentido in div: aux = aux + 1 lista_antonimos = [] try: try: key = lower_case( sentido.find('div', attrs={ 'class': 'sentido' }).getText().strip(":")) except Exception as e: key = lower_case( sentido.find('strong').getText().strip(".")) pass #print(sentido.find('p', attrs={'class':'antonimos'})) p = sentido.find('p', attrs={ 'class': 'antonimos' }).getText()[3:] try: p = str(p.encode('raw_unicode_escape').decode('utf-8')) except Exception as e: print(e) pass #print(p) #print(p.encode('utf-8')) all_values = p.split(',') #print(all_values) for value in all_values: lista_antonimos.append(value.strip(":").strip(' ')) set_(antonimos, palavra + "." + key, lista_antonimos) print("Antônimo Salv@ no Dicionário") except Exception as e: print("\nError:\n" + str(e)) return [] save_json("antonimos", antonimos) return get(antonimos, palavra)