def text_to_bow_vector(some_text, features_dictionary): bow_vector = [0] * len(features_dictionary) tokens = preprocess_text(some_text) for token in tokens: feature_index = features_dictionary[token] bow_vector[feature_index] += 1 return bow_vector, tokens
def value_for_keys_with_item(self, item, *keys): """ Returns the concatenated preprocessed values for keys in the item dictionary. """ val = ' '.join([item.get(k, '') for k in keys]) return preprocessing.preprocess_text(val)
def search(query_words, document_names): search_results = [] for document_name in document_names: page_path = config.INPUT_PATH + "/" + document_name with open(page_path, "r", encoding="utf-8") as f: # Extract text from page page_html = f.read() page_text = preprocessing.extract_text(page_html) # Process words page_words, page_indexes, page_strings = preprocessing.preprocess_text( page_text) # Find query matches frequency = 0 indexes = [] for page_word, page_index in zip(page_words, page_indexes): if page_word in query_words: indexes.append(page_index) frequency += 1 if frequency > 0: # Get snippets snippets_str = searching.extract_snippets( indexes, page_strings) # Add to search results search_results.append((frequency, document_name, snippets_str)) return search_results
def process_init(text): try: # Convert text to lower lower_text = text.lower() # Handle Emojis emojis_text = prep.handle_emojis(lower_text) # Cleaning text processed_text = prep.preprocess_text(emojis_text) # tokenize tokenizer = RegexpTokenizer(r'\w+') tokens = tokenizer.tokenize(processed_text) # remove remaining tokens that are not alphabetic tokens = [word for word in tokens if word.isalpha()] # filter out stop words stop_words = set(stopwords.words('english')) tokens = [w for w in tokens if not w in stop_words] # tokens = [word for word in tokens if len(word) > 1] return tokens except Exception: pass
def evaluate(text): with open('input_tokenizer.pickle', 'rb') as handle: input_tokenizer = pickle.load(handle) with open('output_tokenizer.pickle', 'rb') as handle: output_tokenizer = pickle.load(handle) input_vocab_size = len(input_tokenizer.word_index) + 1 output_vocab_size = len(output_tokenizer.word_index) + 1 text = preprocess_text(text) seq = input_tokenizer.texts_to_sequences([text]) inputs = tf.keras.preprocessing.sequence.pad_sequences(seq, truncating='post', padding='post') inputs = tf.convert_to_tensor(inputs) result = "" encoder = Encoder(input_vocab_size, constants.embedding_dim, constants.units, constants.BATCH_SIZE) decoder = Decoder(output_vocab_size, constants.embedding_dim, constants.units, constants.BATCH_SIZE) checkpoint_dir = './checkpoints' checkpoint = tf.train.Checkpoint(encoder=encoder, decoder=decoder) checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) enc_outputs, enc_hidden = encoder(inputs) dec_hidden = enc_hidden dec_input = tf.expand_dims([output_tokenizer.word_index['<start>']], 0) result = beam_search(constants.beam_width, decoder, dec_input, dec_hidden, enc_outputs, output_tokenizer.word_index['<end>'], output_vocab_size) result = output_tokenizer.sequences_to_texts([result]) print(result[0])
def term_based_search(): query = 'best albums of 2019' query = preprocess_text(query) n = {url['doc_id'] for item in index.values() for url in item} n = len(n) query_tfidf = [ (max(float(0), 1 + log10(count))) * (log10(n / len(index[word]))) for word, count in Counter(query).items() ] dict_docs = dict() for j, word in enumerate(query): for i in index[word]: if i['doc_id'] in dict_docs: dict_docs[i['doc_id']][j] = i['tfidf'] else: dict_docs[i['doc_id']] = [0] * len(query) dict_docs[i['doc_id']][j] = i['tfidf'] search_results = dict() for key, vector in dict_docs.items(): search_results[key] = (1 - spatial.distance.cosine(query_tfidf, vector)) sorted_x = sorted(search_results.items(), key=operator.itemgetter(1), reverse=True) print(sorted_x[:20])
def text_to_bow(some_text): bow_dictionary = {} tokens = preprocess_text(some_text) for token in tokens: if token in bow_dictionary: bow_dictionary[token] += 1 else: bow_dictionary[token] = 1 return bow_dictionary
def tag_glossary(self, tag): """ Returns the glossary text of a tag if set. Otherwise returns an empty string. """ glossary_id = self.tag(tag)['glossary'] if(glossary_id): return preprocessing.preprocess_text(self.item(glossary_id)['text']) return ''
def create_features_dictionary(documents): features_dictionary = {} merged = " ".join(documents) tokens = preprocess_text(merged) index = 0 for token in tokens: if token not in features_dictionary: features_dictionary[token] = index index += 1 return features_dictionary, tokens
def test(): input_text = request.form["tweet"] input_button = request.form["button"] print(input_text) print(input_button) text = preprocess_text(input_text) pred = model.predict(text) return render_template("index.html", pred=str(pred))
def bot(): incoming_msg = request.values.get('Body', '').lower() resp = MessagingResponse() msg = resp.message() responded = False hello_list = ['hello', 'hey', 'start', 'hi'] global hello_flag # -------------------------- # First Time Welcome Message # -------------------------- if any(hello in incoming_msg for hello in hello_list) and hello_flag == 0: set_global_flag(value=1) hello_message = """_Hi, I am *COVID19 Mythbuster*_ 👋🏻 ◻️ _In these crazy hyperconnected times, there is a lot of FAKE NEWS spreading about the NOVEL CORONAVIRUS._ ◻️ _I Can Help You In Differentiating the Fake News From The Real News_ 📰 ◻️ _All you need to do is send me the news you get to verify if it Real or not._ _It's that simple 😃 Try it for yourself, simply send me a News About COVID19 and I'll try to tell if it is Fake Or Real_ ✌🏻✅ """ msg.body(hello_message) responded = True else: text = preprocess_text(incoming_msg) pred = model.predict(text)[0][0] output = '' if pred > 0.5: output = "The given news is real" responded = True elif pred < 0.5: output = "The given news is fake" responded = True msg.body(output) if not responded: msg.body( """That didn't quite work! Try some other text, or send a Hello to get started if you haven't already""") return str(resp)
def process_item(self, album, spider): dir_ = os.path.dirname(os.path.abspath(__file__)) #sentiment and entity extraction blob = album['description'] sentiment_blob = TextBlob(blob) nlp = spacy.load('en') entity_blob = nlp(blob) entities = {X.text for X in entity_blob.ents if X.label_ == 'PERSON'} entities = list(entities) sentiment_dict = { 'sentiment': sentiment_blob.sentiment[0], 'polarity': sentiment_blob.sentiment[1], 'entities': entities } with open( dir_ + '/sentiment/' + spider.name + str(spider.count) + '_sentiment.json', 'w') as outfile: json.dump(sentiment_dict, outfile) # text preprocessing album['description'] = preprocessing.preprocess_text( album['description']) album['name'] = preprocessing.preprocess_text(album['name'], specialchars=False, stopwords=False, stem=False) # write scraped data to json file write_to_json(spider.name + str(spider.count) + '.json', spider.count, dict(album)) return album
def basic_search(): query = 'best albums of 2019' query = preprocess_text(query) dict_docs = dict() for j, word in enumerate(query): for i in index[word]: if i['doc_id'] in dict_docs: dict_docs[i['doc_id']][j] = i['count'] else: dict_docs[i['doc_id']] = [0] * len(query) dict_docs[i['doc_id']][j] = i['count'] search_results = dict() for key, vector in dict_docs.items(): search_results[key] = sum(vector) sorted_x = sorted(search_results.items(), key=operator.itemgetter(1), reverse=True) print(sorted_x[:20])
def main(): preprocessed_text = preprocessing.preprocess_text( preprocessing.query_emailbody()) documents_to_tokens = [ tokenizer.tokenize(sentence) for sentence in preprocessed_text ] message_id = preprocessing.query_messageid() final_df = unsupervised_learning(preprocessed_text, documents_to_tokens, message_id) # Create a temporary table to store the results of unsupervised learning final_df.to_sql('unsupervised_temp', con, if_exists='replace') # Update folder_directory in emails table and delete temporary table for unsupervised learning cur.executescript("""UPDATE emails_main SET folder_directory = ( SELECT Topic FROM unsupervised_temp WHERE message_id = emails_main.message_id) WHERE emails_main.folder_directory IS NULL; DROP TABLE unsupervised_temp;""") con.commit()
import codecademylib3_seaborn import pandas as pd import numpy as np from articles import articles from preprocessing import preprocess_text # import CountVectorizer, TfidfTransformer, TfidfVectorizer from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer # view article #print(articles[4]) # preprocess articles processed_articles = [] for i in range(0,10): processed_articles.append(preprocess_text(articles[i])) #print(processed_articles[4]) # initialize and fit CountVectorizer vectorizer = CountVectorizer() counts = vectorizer.fit_transform(processed_articles) # convert counts to tf-idf transformer = TfidfTransformer(norm=None) tfidf_scores_transformed = transformer.fit_transform(counts) # initialize and fit TfidfVectorizer vectorizer = TfidfVectorizer(norm=None) tfidf_scores = vectorizer.fit_transform(processed_articles) # check if tf-idf scores are equal
import codecademylib3_seaborn import pandas as pd import numpy as np from articles import articles from preprocessing import preprocess_text from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer # check one of the articles #print(articles[0]) # preprocess articles processed_articles = [preprocess_text(article) for article in articles] print(processed_articles[0]) # initialize and fit CountVectorizer vectorizer = CountVectorizer() counts = vectorizer.fit_transform(processed_articles) # convert counts to tf-idf transformer = TfidfTransformer(norm=None) tfidf_scores_transformed = transformer.fit_transform(counts) # initialize and fit TfidfVectorizer vectorizer = TfidfVectorizer(norm=None) tfidf_scores = vectorizer.fit_transform(processed_articles) # check if tf-idf scores are equal if np.allclose(tfidf_scores_transformed.todense(), tfidf_scores.todense()): print(pd.DataFrame({'Are the tf-idf scores the same?':['YES']})) else:
def get_chapter_cloud(chapters, chapter): chapter_cloud_data = [] unique_words = set(chapter) for word in unique_words: weight = utility.tf_idf(word, chapters, chapter) chapter_cloud_data.append((word, int(weight * 100))) weight = lambda element: element[1] chapter_cloud_data.sort(key=weight, reverse=True) return chapter_cloud_data if __name__ == '__main__': book = utility.get_text_file_as_list('shrek.txt') chapters = utility.split_by_delimiter(book, "#" * 10) preprocessed_chapters = [preprocess_text(chapter) for chapter in chapters] cloud_data = prepare_word_cloud_data(preprocessed_chapters) for i, data in enumerate(cloud_data): wc = WordCloud(background_color="white", max_words=2000, contour_width=3, contour_color='steelblue') wc.generate_from_frequencies(dict(data[5:])) wc.to_file(f'clouds/shrek_cloud{i}.png') # subexercise 5 preprocessed_book = preprocess_text(book) cloud = get_chapter_cloud(preprocessed_book, preprocessed_book) wc = WordCloud(background_color="white", max_words=2000, contour_width=3, contour_color='steelblue') wc.generate_from_frequencies(dict(cloud[15:])) wc.to_file('clouds/book_tf_idf.png')
import pandas as pd import numpy as np from articles import articles from preprocessing import preprocess_text ##Note, this code was built with a codecademy course. There are modifications to pull the top n words instead of 1 which the cours did # import CountVectorizer, TfidfTransformer, TfidfVectorizer from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer # preprocess articles processed_articles = [preprocess_text(story) for story in articles] # initialize and fit CountVectorizer vectorizer = CountVectorizer() counts = vectorizer.fit_transform(processed_articles) # convert counts to tf-idf transformer = TfidfTransformer(norm=None) # initialize and fit TfidfVectorizer tfidf_scores_transformed = transformer.fit_transform(counts) vectorizer = TfidfVectorizer(norm=None) tfidf_scores = vectorizer.fit_transform(processed_articles) # check if tf-idf scores are equal if np.allclose(tfidf_scores_transformed.todense(), tfidf_scores.todense()): print(pd.DataFrame({'Are the tf-idf scores the same?': ['YES']})) else: print( pd.DataFrame(
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from preprocessing import preprocess_text response_a = "Every dress style is cut from a polyester blend for a strechy fit." response_b = "The 'Elosie' dress runs large. I suggest you take your regular size or smaller." response_c = "The 'Elosie' dress comes in green, lavender, and orange." user_message = "Hello! What is the fit of the 'Elosie' dress? My shoulders are broad, so I often size up for a comfortable fit. Do dress sizes run large or small?" documents = [response_a, response_b, response_c, user_message] # preprocess responses and user_message processed_docs = [preprocess_text(doc) for doc in documents] # create tfidf vectorizer vectorizer = TfidfVectorizer() # fit and transform vectorizer on processed docs tfidf_vectors = vectorizer.fit_transform(processed_docs) # compute cosine similarity betweeen the user message tf-idf vector and the different response tf-idf vectors cosine_similarities = cosine_similarity(tfidf_vectors[-1], tfidf_vectors) # get the index of the most similar response to the user message similar_response_index = cosine_similarities.argsort()[0][-2] best_response = documents[similar_response_index] print(best_response)
from torchtext_sentiment import analyse_sentiments from utils import get_model_name # INPUTS ############ PROCESS_DATASETS = False CREATE_EMBEDDINGS = False TRAINING_MODULE = True training_mode = True if PROCESS_DATASETS: dataset_path = os.path.normpath(os.getcwd() + os.sep + os.pardir) dataset_path = os.path.join(dataset_path, "data") dataset_path = os.path.join(dataset_path, "training.1600000.processed.noemoticon.csv") preprocess_text(dataset_path, stem=False) if CREATE_EMBEDDINGS: # TODO CREATE OWN EMBEDDINGS embedding_params = [{ 'min_count': [1], # valitaan tähän vakioarvo 'max_vocab_size': [1000e3], # valitaan tähän vakioarvo, esim. 50k 'window_size': [7], # Testataanko: [5, 10] for skip-gram usually around 10, for CBOW around 5 'vector_size': [100], # Testataanko [10, 100, 300] 'noise_words': [20], # for large datasets between 2-5 valitaan yksi 'use_skip_gram': [1], # 1 for skip-gram, 0 for CBOW, testi molemmilla? 'cbow_mean': [0], # if using cbow 'w2v_iters': [10] # onko tarpeeksi? }]
Not one of all the purple host Who took the flag to-day Can tell the definition, So clear, of victory, As he, defeated, dying, On whose forbidden ear The distant strains of triumph Break, agonized and clear!''' # define clear_count: clear_count = 2 # preprocess text processed_poem = preprocess_text(poem) # initialize and fit CountVectorizer vectorizer = CountVectorizer() term_frequencies = vectorizer.fit_transform([processed_poem]) # get vocabulary of terms feature_names = vectorizer.get_feature_names() # print(feature_names) # create pandas DataFrame with term frequencies try: df_term_frequencies = pd.DataFrame(term_frequencies.T.todense(), index=feature_names, columns=['Term Frequency']) print(df_term_frequencies)
import hickle from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.models import load_model import numpy as np from preprocessing import preprocess_text X_train, X_test, y_train, y_test, vocab_size, embedding_matrix, maxlen, tokenizer = hickle.load( "preprocessed.hickle") test_texts = [ "f*****g shit movie so bad omg why is it this shit dont like", "i really liked the style of the movie, moreover the style is really fresh and nice" ] for i, text in enumerate(test_texts): test_texts[i] = preprocess_text(text) tokenized = tokenizer.texts_to_sequences(test_texts) tokenized = pad_sequences(tokenized, padding='post', maxlen=maxlen) model = load_model("model.h5") model.summary() predict = model.predict(tokenized) print(predict)
return search_results if __name__ == "__main__": # Parse parameters if len(sys.argv) < 2: print(f"Error: Missing search parameter!") sys.exit(1) query_text = sys.argv[1] # Use perf_counter instead of process_time when multiprocessing time_start = time.perf_counter() # Start timer # Preprocess query query_words, _, _ = preprocessing.preprocess_text(query_text) query_words = searching.remove_duplicates(query_words) search_results = [] # Get list of all documents document_names = [] for site in config.INPUT_SITES: site_path = config.INPUT_PATH + "/" + site padding = (max([len(x) for x in config.INPUT_SITES]) - len(site)) * " " # Add spaces to align progress bars for page in os.listdir(site_path): # Only process html files with the same name as site if page.startswith(site) and page.endswith(".html"): document_names.append(site + "/" + page)
import codecademylib3_seaborn import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from raven import the_raven_stanzas from preprocessing import preprocess_text # view first stanza print(the_raven_stanzas[0]) vectorizer = TfidfVectorizer(norm=None) # preprocess documents processed_stanzas = [preprocess_text(stanza) for stanza in the_raven_stanzas] # initialize and fit TfidfVectorizer tfidf_scores = vectorizer.fit_transform(processed_stanzas) # get vocabulary of terms feature_names = vectorizer.get_feature_names() # get stanza index stanza_index = [f"Stanza {i+1}" for i in range(len(the_raven_stanzas))] # create pandas DataFrame with tf-idf scores try: df_tf_idf = pd.DataFrame(tfidf_scores.T.todense(), index=feature_names, columns=stanza_index) print(df_tf_idf)
import pandas as pd from sklearn.feature_extraction.text import CountVectorizer from preprocessing import preprocess_text from poems import poems # preprocess text processed_poems = [preprocess_text(poem) for poem in poems] # initialize and fit CountVectorizer vectorizer = CountVectorizer() term_frequencies = vectorizer.fit_transform(processed_poems) # get vocabulary of terms feature_names = vectorizer.get_feature_names() # get corpus index corpus_index = [f"Poem {i+1}" for i in range(len(poems))] # create pandas DataFrame with term frequencies df_term_frequencies = pd.DataFrame(term_frequencies.T.todense(), index=feature_names, columns=corpus_index)
import codecademylib3_seaborn import pandas as pd import numpy as np from articles import articles from preprocessing import preprocess_text # import CountVectorizer, TfidfTransformer, TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer, TfidfTransformer # view article print(articles[0]) # preprocess articles processed_articles = [preprocess_text(document) for document in articles] # initialize and fit CountVectorizer vectorizer = CountVectorizer() counts = vectorizer.fit_transform(processed_articles) # convert counts to tf-idf transformer = TfidfTransformer(norm=None) tfidf_scores_transformed = transformer.fit_transform(counts) # initialize and fit TfidfVectorizer vectorizer = TfidfVectorizer(norm=None) tfidf_scores = vectorizer.fit_transform(processed_articles) # check if tf-idf scores are equal if np.allclose(tfidf_scores_transformed.todense(), tfidf_scores.todense()): print(pd.DataFrame({'Are the tf-idf scores the same?': ['YES']})) else:
import nltk, re from sherlock_holmes import bohemia_ch1, bohemia_ch2, bohemia_ch3, boscombe_ch1, boscombe_ch2, boscombe_ch3 from preprocessing import preprocess_text from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer from sklearn.decomposition import LatentDirichletAllocation # preparing the text corpus = [ bohemia_ch1, bohemia_ch2, bohemia_ch3, boscombe_ch1, boscombe_ch2, boscombe_ch3 ] preprocessed_corpus = [preprocess_text(chapter) for chapter in corpus] # Update stop_list: stop_list = [ 'man', 'say', 'upon', 'could', 'one', 'see', 'think', 'know', 'come', 'yes' ] # filtering topics for stop words def filter_out_stop_words(corpus): no_stops_corpus = [] for chapter in corpus: no_stops_chapter = " ".join( [word for word in chapter.split(" ") if word not in stop_list]) no_stops_corpus.append(no_stops_chapter) return no_stops_corpus filtered_for_stops = filter_out_stop_words(preprocessed_corpus)
import codecademylib3_seaborn from preprocessing import preprocess_text import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer # sample documents document_1 = "This is a sample sentence!" document_2 = "This is my second sentence." document_3 = "Is this my third sentence?" # corpus of documents corpus = [document_1, document_2, document_3] # preprocess documents processed_corpus = [preprocess_text(doc) for doc in corpus] # initialize and fit TfidfVectorizer vectorizer = TfidfVectorizer(norm=None) tf_idf_scores = vectorizer.fit_transform(processed_corpus) # get vocabulary of terms feature_names = vectorizer.get_feature_names() corpus_index = [n for n in processed_corpus] # create pandas DataFrame with tf-idf scores df_tf_idf = pd.DataFrame(tf_idf_scores.T.todense(), index=feature_names, columns=corpus_index) print(df_tf_idf)
for folder in os.listdir(text_dir): all_text[folder] = [] for file_ in os.listdir(text_dir / folder): with open(text_dir / folder / file_) as f: all_text[folder].append(f.read().strip()) return(all_text) if __name__ == "__main__": #reading files and getting the data as a dict all_text_list = get_all_text() #cleaning and preprocessing the data all_text_list_clean = {} for i, class_ in enumerate(all_text_list): all_text_list_clean[class_] = [] for j, _ in enumerate(all_text_list[class_]): all_text_list_clean[class_].append(preprocess_text(all_text_list[class_][j])) #saving text as minimal processed doc all_text = '' for class_ in all_text_list_clean: for value in all_text_list_clean[class_]: if value.strip()!= '': all_text += f'__label__{class_} {value}\n' # all_text += f'{value}\n' with open('./data/raw_text.txt', 'w') as f: f.write(all_text.strip())
# {key=word: val=frequency) bow_vectorizer = CountVectorizer() # Define friends_vectors: friends_vectors = bow_vectorizer.fit_transform(friends_docs) # Define friends_classifier: friends_classifier = MultinomialNB() # train test split X_train, X_test, y_train, y_test = train_test_split(friends_vectors, friends_labels, test_size=0.33, random_state=42) #never before seen message for which the NN will predict the sender # mystery_message = "big boi manipulation lol" mystery_message = '''lmao well since you're up, ill update and say i will never not buy anything off of amazon ever again literally found the same shoe for 40 dollars less than their sale?''' mystery_message = preprocess_text(mystery_message) # Define mystery_vector: mystery_vector = bow_vectorizer.transform([mystery_message]) # Train the classifier: # friends_classifier.fit(friends_vectors, friends_labels) friends_classifier.fit(X_train, y_train) # Change prediction back to a name: predictions = friends_classifier.predict(mystery_vector) confidence = friends_classifier.predict_proba(mystery_vector) #technically the probability. score = friends_classifier.score(X_test, y_test) score1 = friends_classifier.score(X_train, y_train)
from preprocessing import preprocess_text from nltk.util import ngrams from collections import Counter text = "It's exciting to watch flying fish after a hard day's work. I don't know why some fish prefer flying and other fish would rather swim. It seems like the fish just woke up one day and decided, 'hey, today is the day to fly away.'" tokens = preprocess_text(text) # Bigram approach: bigrams_prepped = ngrams(tokens, 2) bigrams = Counter(bigrams_prepped) print( "Three most frequent word sequences and the number of occurrences according to Bigrams:" ) print(bigrams.most_common(3)) # Bag-of-Words approach: # Define bag_of_words here: bag_of_words = Counter(tokens) most_common_three = bag_of_words.most_common(3) print( "\nThree most frequent words and number of occurrences according to Bag-of-Words:" ) print(bag_of_words) print(most_common_three)
word_occurence_indices = [ index for index, element in enumerate(book) if element.strip() == word ] return [ book[index + 2] for index in word_occurence_indices if index + 2 < len(book) ] def generate_random_paragraph(start_word, words, length=50): next_word = words[start_word][random.randint(0, len(words[start_word]) - 1)] paragraph = [start_word, next_word] current_word = next_word for _ in range(length - 2): random_index = random.randint(0, len(words[current_word]) - 1) next_word = words[current_word][random_index] current_word = next_word paragraph.append(next_word) return ' '.join(paragraph) if __name__ == '__main__': book = utility.get_text_file_as_list('shrek.txt') book = preprocessing.preprocess_text(book) words_with_successors = get_words_with_most_common_successors(book) paragraph = generate_random_paragraph('donkey', words_with_successors) pdb.set_trace()