def getKeywords(pdfFile,Occur): tikaurl= tika_obo.getTikaAddress() parsed = parser.from_file(pdfFile, tikaurl) metadata = parsed["metadata"] doccontent = parsed["content"] fullwordlist = obo.stripNonAlphaNum(doccontent) wordlist = obo.removeStopwords(fullwordlist, obo.stopwords) dictionary = obo.wordListToFreqDict(wordlist) sorteddict = obo.sortFreqDict(dictionary) count = 0 keywords = [] shortkey = [] maxoccur = Occur for s in sorteddict: numocc = int(s[0]) word = s[1].encode('utf-8') if numocc > maxoccur: keyword = { word : str(numocc) } keywords.append(keyword) if len(word)>6: shortkey.append(word.lower()) count = count + 1 if Occur > 0: return shortkey return keywords
def __iter__(self): for year in years: fname = 'arXiv-meta-{}.xml'.format(year) #path=os.path.dirname(os.getcwd())+'\\DATA\\'+fname path = os.path.dirname(os.getcwd()) + '/DATA/SORTED/' + fname for event, elem in ET.iterparse(path): if elem.tag == 'description': desc = obo.removeStopwords(obo.stripNonAlphaNum(elem.text), obo.stopwords) yield dictionary.doc2bow(desc)
def getPositive(tf): file = open(tf, encoding="Latin-1") text = file.read() file.close() fullwordlist = obo.stripNonAlphaNum(text) wordlist = obo.rabinKarp1(fullwordlist, obo.stopwords, 101) wordlist1 = obo.rabinKarp2(wordlist, posiwordlist, 101) return len(wordlist1)
def PostiveOrNegative(tf=''): file = open(tf, encoding="Latin-1") text = file.read() file.close() fullwordlist = obo.stripNonAlphaNum(text) wordlist = obo.rabinKarp1(fullwordlist, obo.stopwords, 101) wordlist1 = obo.rabinKarp2(wordlist, posiwordlist, 101) wordlist2 = obo.rabinKarp2(wordlist, negawordlist, 101) if len(wordlist1) > len(wordlist2): return ['Positive', len(wordlist1) / (len(wordlist1) + len(wordlist2))] else: return ['Negative', len(wordlist1) / (len(wordlist1) + len(wordlist2))]
def graphAndEvaluation(tf=''): file = open(tf, encoding="Latin-1") text = file.read() file.close() fullwordlist = obo.stripNonAlphaNum(text) wordlist = obo.rabinKarp1(fullwordlist, obo.stopwords, 101) wordlist1 = obo.rabinKarp2(wordlist, posiwordlist, 101) wordlist2 = obo.rabinKarp2(wordlist, negawordlist, 101) wordString = listToString(wordlist) dictionary = obo.wordListToFreqDict(wordlist) dictionaryposi = obo.wordListToFreqDict(wordlist1) dictionarynega = obo.wordListToFreqDict(wordlist2) sorteddict = obo.sortFreqDict(dictionary) sorteddictposi = obo.sortFreqDict(dictionaryposi) sorteddictnega = obo.sortFreqDict(dictionarynega) # for s in sorteddictposi: print(str(s)) N = 100000 t = list(dictionary.keys()) y = list(dictionary.values()) fig1 = go.Figure(data=go.Scatter(x=t, y=y, mode='markers')) fig1.update_layout( title={ 'text': tf + " Word Counts", 'y': 0.9, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }) fig1.show() x1 = wordlist1 x2 = wordlist2 fig = go.Figure() fig.update_layout( title={ 'text': tf + " Negative and Positive Histogram", 'y': 0.9, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }) fig.add_trace(go.Histogram(histfunc="sum", x=x1, name="Positive Word")) fig.add_trace(go.Histogram(histfunc="sum", x=x2, name="Negative Word")) fig.show()
def count(): form = WordForm() if form.validate_on_submit(): url = form.url.data response = requests.get(url) html = response.content.decode("utf-8") text = obo.stripTags(html).lower() fullwordlist = obo.stripNonAlphaNum(text) wordlist = obo.removeStopwords(fullwordlist, obo.stopwords) dictionary = obo.wordListToFreqDict(wordlist) sorteddict = obo.sortFreqDict(dictionary) for s in sorteddict[:21]: flash(str(s)) return redirect(url_for('index')) return render_template('count.html', title='Word Count Application', form=form)
def hitString(limit, url): response = urllib2.urlopen(url) html = response.read() soup = BeautifulSoup(html, "lxml") # kill all script and style elements for script in soup(["script", "style"]): script.extract() # rip it out # get text text = soup.get_text() text = text.lower() '''# break into lines and remove leading and trailing space on each lines = (line.strip() for line in text.splitlines()) # break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # drop blank lines text = '\n'.join(chunk for chunk in chunks if chunk)''' #text = text.lower() fullwordlist = obo.stripNonAlphaNum(text) wordlist = obo.removeStopwords(fullwordlist, obo.stopwords) dictionary = obo.wordListToFreqDict(wordlist) sorteddict = obo.sortFreqDict(dictionary) count = 0 global retStr for s in sorteddict: mys = (str(s[1:])) mys2 = mys.strip("(,)") mys3 = mys2[1:] mys4 = str(mys3.strip("''")) mys4 = str(mys4) if re.match("^-?[0-9]+$", mys4): continue count += 1 #print mys4 retStr += str(mys4) + ' ' if count == limit: break return retStr
title=titles[rec] text=title+desc date=datetime.strptime(dates[rec],'%Y-%m-%d') Q=int(ceil(date.month/3.)-1) ind = 4*(date.year-year0)+Q NRecQuarter[ind]+=1 fulltextQuarter[Q]=fulltextQuarter[Q]+text NRecRunning=NRecRunning+NRecYear[x] for q in range(4): desc_fullwordlist = obo.stripNonAlphaNum(fulltextQuarter[q]) desc_wordlist = obo.removeStopwords(desc_fullwordlist,obo.stopwords) desc_dictionary = obo.wordListToFreqDict(desc_wordlist) desc_sorteddict = obo.sortFreqDict(desc_dictionary) topWords.append(desc_sorteddict[:5000]) print ('Year: {}; Quarter: Q{}; Num. entries: {}'.format(years[x],q+1,NRecQuarter[4*(date.year-year0)+q])) #for s in desc_sorteddict[:10]: print(str(s)) #print('\n') print('\n') ################################################################################################# ################################################################################################# ## Pickle? ## Pickle?
linewords = line.split() resultwords = [ word1 for word1 in linewords if word1 not in stopword ] result = ' '.join(resultwords) #line = line.lower().replace(stopword, "") line = regexnumber.sub('', result) stopwords.close() stemmout.write(result + '\n') stemmout.close() dataset.close() dataset = open('stemm-result/' + title + '-steem.txt', 'r') fullwordlist = obo.stripNonAlphaNum(dataset.read().lower()) wordlist = obo.removeStopwords(fullwordlist, obo.stopwords) dictionary = obo.wordListToFreqDict(wordlist) sorteddict = obo.sortFreqDict(dictionary) dataset.close() dataset = open('stemm-result/' + title + '-steem.txt', 'r') lines = [] regex = re.compile(r'[\n\r\t]') for sentence in dataset: line = regex.sub('', sentence) lines.append(str(line)) i = 0
help='Cantidad de numeros antes y despues de la palabra', type=int, required=True) args = parser.parse_args() statusCode = 0 try: r = requests.get(args.url) #cargamos la respuesta de GET de la pĂ gina statusCode = r.status_code except: print( "Formato de la web erroneo, debe ser del estilo http://www.google.com") if statusCode == 200: text = obo.stripTags(r.text) #quitamos las etiquetas y pasamos a minuscula fullwordlist = obo.stripNonAlphaNum( text) #quitamos los que no son alfanumericos if args.stopwords: fullwordlist = obo.removeStopwords( fullwordlist, args.stopwords ) #eliminamos las palabras de uso comun segun el idioma if args.long: fullwordlist = obo.excludeTwo( fullwordlist, args.long) #eliminamos las palabras con menos de 2 caracteres dictionary = obo.wordListToFreqDict( fullwordlist) #nos devuelve un diccionario palabra - frequencia sorteddict = obo.sortFreqDict( dictionary ) #ordena las palabras por su frequencia (nos han devuelto una lista de listas) if args.tipo == 'simple': obo.makePassfile(sorteddict,
from gensim import corpora import obo try: import xml.etree.cElementTree as ET except ImportError: import xml.etree.ElementTree as ET ## specify range of data years = range(1992, 2017) fileName = 'fullDictionary.dict' ## build dictionary from stream, clean up as we go print('\nGenerating dictionary for years {} to {} ... '.format( years[0], years[-1])) doc_stream = ( obo.removeStopwords(obo.stripNonAlphaNum(elem.text), obo.stopwords) for year in years for event, elem in ET.iterparse('RAW/arXiv-meta-{}.xml'.format(year)) if elem.tag == 'description') dictionary = corpora.Dictionary(doc_stream) print('Finished!') print(dictionary) ## save to file dictionary.save(fileName) print('Dictionary saved as {}'.format(fileName)) ''' ## some output checks i=-1 for d in dictionary.token2id: i+=1 if i<20:
desc = descriptions[rec] title = titles[rec] text = title + desc date = datetime.strptime(dates[rec], '%Y-%m-%d') Q = int(ceil(date.month / 3.) - 1) ind = 4 * (date.year - year0) + Q NRecQuarter[ind] += 1 fulltextQuarter[Q] = fulltextQuarter[Q] + text NRecRunning = NRecRunning + NRecYear[x] for q in range(4): desc_fullwordlist = obo.stripNonAlphaNum(fulltextQuarter[q]) desc_wordlist = obo.removeStopwords(desc_fullwordlist, obo.stopwords) desc_dictionary = obo.wordListToFreqDict(desc_wordlist) desc_sorteddict = obo.sortFreqDict(desc_dictionary) topWords.append(desc_sorteddict[:5000]) print('Year: {}; Quarter: Q{}; Num. entries: {}'.format( years[x], q + 1, NRecQuarter[4 * (date.year - year0) + q])) #for s in desc_sorteddict[:10]: print(str(s)) #print('\n') print('\n') ################################################################################################# ################################################################################################# ## Pickle?
import requests, obo url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' pagetext = requests.get(url) HTML = pagetext.text text = obo.stripTags(HTML).lower() # make lower case wordlist = obo.stripNonAlphaNum( text) # convert to list of words, no punctuation dictionary = obo.wordListToFreqDict( wordlist) # add words, counts to dictionary sorteddict = obo.sortFreqDict(dictionary) # sort word list by frequency for s in sorteddict: print(str(s))
def hello_world(): if request.method == "GET": return redirect("/app/index.html") else: pprint.pprint(request.form) pprint.pprint(request.files) #Language check if request.form['language'] not in ['english', 'dutch']: return jsonify(status='error', message="Invalid language!") #Input normalization if request.form['upload_option'] == 'text_field': input_text = request.form['upload_textarea'] elif request.form['upload_option'] == 'url': page_text = requests.get(request.form['upload_url']).text soup = BeautifulSoup(page_text, "html.parser") input_text = soup.text elif request.form['upload_option'] == 'file': input_text = UnicodeDammit( request.files.get('upload_file').read()).unicode_markup #Stemmer selection if request.form['stemmer'] == 'no_stemmer': stemmer = None elif request.form['stemmer'] == 'porter': if request.form['language'] != 'english': return jsonify(status='error', message="Invalid language for stemmer porter!") stemmer = PorterStemmer() elif request.form['stemmer'] == 'snowball': stemmer = SnowballStemmer(request.form['language']) else: return jsonify(status='error', message="Invalid stemmer!") #Lemmatizer selection if request.form['lemmatizer'] == 'lemmatizer_off': lemmatizer = None elif request.form['language'] == 'english': lemmatizer = lemmatizer_en else: lemmatizer = lemmatizer_nl #Stopwords selection if request.form['stopwords'] == 'no_stopwords': stopwords = None elif request.form['stopwords'] == 'our_stopwords': stopwords = obo.stopwords elif request.form['stopwords'] == 'custom_stopwords': custom_stopword_text = UnicodeDammit( request.files.get( 'custom_stopword_file').read()).unicode_markup stopwords = obo.stripNonAlphaNum(custom_stopword_text) #Process the text input_text_word_count = 0 resulting_text = "" final_wordlist = [] for word_type, word in text_processor.parse_text(input_text): if word_type == "non-word": resulting_text += word else: input_text_word_count += 1 processed_word = word if stemmer: processed_word = stemmer.stem(processed_word) if lemmatizer: processed_word = lemmatizer(processed_word) if not stopwords or processed_word not in stopwords: if request.form['exclude_vowels'] == 'exclude_vowels_yes': if request.form['language'] == 'english': regex = re_vowel_en else: regex = re_vowel_nl processed_word = regex.sub("", processed_word) resulting_text += processed_word final_wordlist.append(processed_word) dictionary = obo.wordListToFreqDict(final_wordlist) sorteddict = obo.sortFreqDict(dictionary) ignore_results_amount = int(request.form['ignore_results_amount']) if ignore_results_amount > 0: initial_index = ignore_results_amount ignored_words = [word for rank, word in sorteddict[:initial_index]] sorteddict = sorteddict[initial_index:] new_text = "" new_wordlist = [] for word_type, word in text_processor.parse_text(resulting_text): if word_type == "non-word": new_text += word elif word not in ignored_words: new_text += word new_wordlist.append(word) resulting_text = new_text final_wordlist = new_wordlist else: initial_index = 0 #Do the math! input_text_char_count = len(input_text) word_count = len(final_wordlist) distinct_words_count = len(sorteddict) words = [] frequencies = [] word_cloud = [] for frequency, word in sorteddict: words.append(word) frequencies.append(frequency) word_cloud.append([word, frequency]) acum_perc = Decimal(0) percentages = [] acum_perc_list = [] for freq in frequencies: perc = Decimal((freq * 100.0) / word_count) percentages.append(round(perc, 2)) acum_perc += perc acum_perc_list.append(round(acum_perc, 2)) logarithms = [] for i in range(len(sorteddict)): logarithms.append((math.log(i + 1), math.log(frequencies[i]))) #Calculate Linear regression #http://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.lstsq.html#numpy.linalg.lstsq x = numpy.array([math.log(f) for f in frequencies]) y = numpy.array( [math.log(rank) for rank in range(1, distinct_words_count + 1)]) A = numpy.vstack([x, numpy.ones(len(x))]).T m, c = numpy.linalg.lstsq(A, y)[0] #Calculate the regression line start and end, # and sort making the start be the one with the lower X value # (highcharts requires this) regline_start = (0, c) regline_end = (math.log(distinct_words_count), math.log(distinct_words_count) * m + c) regression_line = {'start': regline_start, 'end': regline_end} return jsonify(status='success', words=words, frequencies=frequencies, percentages=percentages, acum_perc_list=acum_perc_list, logarithms=logarithms, regression_line=regression_line, resulting_text=resulting_text, input_text_char_count=input_text_char_count, input_text_word_count=input_text_word_count, output_text_word_count=word_count, word_cloud=word_cloud, sorteddict=sorteddict)
################################################################################################# ## Perform query ################################################################################################# query = "quantum simulation phase transition" query = "quantum error correction shor" query = "three dimensional doppler cooling theory helium" #query = "response particle periodic potential effective mass dynamics bose einstein condensate" query = "anderson localization matter waves" Ntop = 300 print('\n') print('QUERY: ' + query) print('TOP {} hits: '.format(Ntop) + query) ## find similarity of query to articles in corpus vec_bow = dictionary.doc2bow( obo.removeStopwords(obo.stripNonAlphaNum(query), obo.stopwords)) vec_lsi = lsi[vec_bow] sims = index[vec_lsi] # perform a similarity query against the corpus sims = sorted(enumerate(sims), key=lambda item: -item[1]) ################################################################################################# ## Prepare data for plotting ################################################################################################# ## prepare data for scatter plot -- article index, score, date, titles iArr = [x[0] for x in sims[:Ntop]] sArr = [x[1] for x in sims[:Ntop]] dArr = [year[i] for i in iArr] labelsArr = [titles[i] for i in iArr] datesArr = [dates[i] for i in iArr] ## prepare data for line plot -- data binned into quarters
import requests, obo url = 'http://literature.org/authors/shelley-mary/frankenstein/chapter-01.html' pagetext = requests.get(url) HTML = pagetext.text text = obo.stripTags(HTML).lower() # convert to lower case fullwordlist = obo.stripNonAlphaNum(text) # only words, into list wordlist = obo.removeStopwords(fullwordlist, obo.stopwords) # remove common useless words dictionary = obo.wordListToFreqDict( wordlist) # add words and counts to dictionary sorteddict = obo.sortFreqDict(dictionary) # sort word list by frequency if __name__ == '__main__': for s in sorteddict: print(str(s))
import requests, obo url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' pagetext = requests.get(url) HTML = pagetext.text wordlist = HTML.split() text = obo.stripTags(HTML).lower() # convert to lower case #wordlist = text.split() wordlist = obo.stripNonAlphaNum(text) # RegEx and split done together print(wordlist[0:150])
# html-to-kwic.py import obo # create dictionary of n-grams n = 7 url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' text = obo.webPageToText(url) fullwordlist = ('# ' * (n//2)).split() fullwordlist += obo.stripNonAlphaNum(text) fullwordlist += ('# ' * (n//2)).split() ngrams = obo.getNGrams(fullwordlist, n) worddict = obo.nGramsToKWICDict(ngrams) # output KWIC and wrap with html target = 'black' outstr = '<pre>' if worddict.has_key(target): for k in worddict[target]: outstr += obo.prettyPrintKWIC(k) outstr += '<br />' else: outstr += 'Keyword not found in source' outstr += '</pre>' obo.wrapStringInHTMLMac('html-to-kwic', url, outstr)
years = range(1992, 1995) years = range(1995, 1998) years = range(1998, 2001) years = range(2001, 2004) years = range(2004, 2007) years = range(2007, 2010) years = range(2010, 2013) years = range(2013, 2017) scoreList = [] title = [] for year in years: for event, elem in ET.iterparse('arXiv-meta-{}.xml'.format(year)): if elem.tag == 'description': #queryList.append(elem.text) query = elem.text queryStripped = obo.removeStopwords(obo.stripNonAlphaNum(query), obo.stopwords) vec_bow = dictionary.doc2bow(queryStripped) vec_lsi = lsi[vec_bow] score = (vec_lsi[0][1], vec_lsi[1][1]) scoreList.append(score) if elem.tag == 'title': title.append(elem.text) x = [a[0] for a in scoreList] y = [a[1] for a in scoreList] import matplotlib.pyplot as plt import mpld3 fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'), figsize=(18, 12)) scatter = ax.scatter(x, y, y, alpha=0.3, cmap=plt.cm.jet)
#html-to-freq.py import urllib2, obo url = 'http://www.oldbaileyonline.org/browse.jsp?id=t17800628-33&div=t17800628-33' response = urllib2.urlopen(url) html = response.read() text = obo.stripTags(html).lower() wordlist = obo.stripNonAlphaNum(text) dictionary = obo.wordListToFreqDict(wordlist) sorteddict = obo.sortFreqDict(dictionary) for s in sorteddict: print(str(s))
def post(self, request): pprint.pprint(request.POST) pprint.pprint(request.FILES) #Language check if request.POST['language'] not in ['english', 'dutch']: return jsonify(status='error', message="Invalid language!") if request.POST['database'] not in connections: return jsonify(status='error', message="Invalid database!") #Input normalization if request.POST['upload_option'] == 'text_field': input_text = request.POST['upload_textarea'] elif request.POST['upload_option'] == 'url': page_text = requests.get(request.POST['upload_url']).text soup = BeautifulSoup(page_text, "html.parser") input_text = soup.text elif request.POST['upload_option'] == 'file': input_text = UnicodeDammit(request.FILES['upload_file'].read()).unicode_markup elif request.POST['upload_option'] == 'news_comments': start_date_text = request.POST['news_comments_start_date'] end_date_text = request.POST['news_comments_end_date'] start_date = datetime.date(*[int(i) for i in start_date_text.split('-')]) end_date = datetime.date(*[int(i) for i in end_date_text.split('-')]) filters = { 'date__gte': start_date, 'date__lte': end_date, 'text__isnull': False } input_text = "" if 'news' in request.POST['news_comments']: queryset = Newsitem.objects\ .using(request.POST['database'])\ .filter(**filters)\ .select_related('text') for newsitem in queryset: input_text += "\n"+newsitem.text.text if 'comments' in request.POST['news_comments']: for comment in Comment.objects\ .using(request.POST['database'])\ .filter(**filters)\ .select_related('text'): input_text += "\n"+comment.text.text #Stemmer selection if request.POST['stemmer'] == 'no_stemmer': stemmer = None elif request.POST['stemmer'] == 'porter': if request.POST['language'] != 'english': return jsonify(status='error', message="Invalid language for stemmer porter!") stemmer = PorterStemmer() elif request.POST['stemmer'] == 'snowball': stemmer = SnowballStemmer(request.POST['language']) else: return jsonify(status='error', message="Invalid stemmer!") #Lemmatizer selection if request.POST['lemmatizer'] == 'lemmatizer_off': lemmatizer = None elif request.POST['language'] == 'english': lemmatizer = lemmatizer_en else: lemmatizer = lemmatizer_nl #Stopwords selection if request.POST['stopwords'] == 'no_stopwords': stopwords = None elif request.POST['stopwords'] == 'our_stopwords': stopwords = obo.stopwords elif request.POST['stopwords'] == 'custom_stopwords': custom_stopword_text = UnicodeDammit(request.FILES.get('custom_stopword_file').read()).unicode_markup stopwords = obo.stripNonAlphaNum(custom_stopword_text) #Process the text input_text_word_count = 0 resulting_text = "" final_wordlist = [] for word_type, word in text_processor.parse_text(input_text): if word_type == "non-word": resulting_text += word else: input_text_word_count += 1 processed_word = word if stemmer: processed_word = stemmer.stem(processed_word) if lemmatizer: processed_word = lemmatizer(processed_word) if not stopwords or processed_word not in stopwords: if request.POST['exclude_vowels'] == 'exclude_vowels_yes': if request.POST['language'] == 'english': regex = re_vowel_en else: regex = re_vowel_nl processed_word = regex.sub("", processed_word) resulting_text += processed_word final_wordlist.append(processed_word) dictionary = obo.wordListToFreqDict(final_wordlist) sorteddict = obo.sortFreqDict(dictionary) ignore_results_amount = int(request.POST['ignore_results_amount']) if ignore_results_amount > 0: initial_index = ignore_results_amount ignored_words = [word for rank, word in sorteddict[:initial_index]] sorteddict = sorteddict[initial_index:] new_text = "" new_wordlist = [] for word_type, word in text_processor.parse_text(resulting_text): if word_type == "non-word": new_text += word elif word not in ignored_words: new_text += word new_wordlist.append(word) resulting_text = new_text final_wordlist = new_wordlist else: initial_index = 0 #Do the math! input_text_char_count = len(input_text) word_count = len(final_wordlist) distinct_words_count = len(sorteddict) words = [] frequencies = [] word_cloud = [] for frequency, word in sorteddict: words.append(word) frequencies.append(frequency) word_cloud.append([word, frequency]) acum_perc = Decimal(0) percentages = [] acum_perc_list = [] for freq in frequencies: perc = Decimal((freq*100.0)/word_count) percentages.append(round(perc, 2)) acum_perc += perc acum_perc_list.append(round(acum_perc, 2)) logarithms = [] for i in range(len(sorteddict)): logarithms.append((math.log(i+1), math.log(frequencies[i]))) #Calculate Linear regression #http://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.lstsq.html#numpy.linalg.lstsq x = numpy.array([math.log(f) for f in frequencies]) y = numpy.array([math.log(rank) for rank in range(1, distinct_words_count + 1)]) A = numpy.vstack([x, numpy.ones(len(x))]).T m, c = numpy.linalg.lstsq(A, y)[0] #Calculate the regression line start and end, # and sort making the start be the one with the lower X value # (highcharts requires this) regline_start = (0, c) regline_end = (math.log(distinct_words_count), math.log(distinct_words_count) * m + c) regression_line = { 'start': regline_start, 'end': regline_end } return JsonResponse({ 'status': 'success', 'words': words, 'frequencies': frequencies, 'percentages': percentages, 'acum_perc_list': acum_perc_list, 'logarithms': logarithms, 'regression_line': regression_line, 'resulting_text': resulting_text, 'input_text_char_count': input_text_char_count, 'input_text_word_count': input_text_word_count, 'output_text_word_count': word_count, 'word_cloud': word_cloud, 'sorteddict': sorteddict })