def count_word_occurance(): if request.method == 'POST': session_id = request.args.get("session_id") word = request.form['word'] word_stem = indeed_scrape.stemmer.stem(word) sess_dict = get_sess(session_id) df = load_csv(session_id) count = 0 for doc in df['summary_stem']: tokens = np.array(indeed_scrape.toker(doc)) test = np.where(tokens == word_stem)[0].shape[0] if test > 0: count += 1 number = df.shape[0] percent = 100 * np.round(float(count) / number, decimals=1) percent = str(percent) session_string = "?session_id=%s" % session_id return render_template('word_count.html', count=count, session_id=session_string, word=word, number=number, percent=percent) if request.method == 'GET': session_id = request.args.get("session_id") session_string = "?session_id=%s" % session_id return render_template('word_count.html', session_id=session_string)
def normalize_titles(self, titles): ind = indeed_scrape.Indeed("kw") titles = map(lambda x: x.replace("/", " "), titles) titles = map(lambda x: x.replace("-", " "), titles) titles = map(lambda x: re.sub("^\s+", "", x), titles) titles = map(lambda x: re.sub("$\s+", "", x), titles) titles = map(lambda x: re.sub("\s{2,}", " ", x), titles) out = [] for title in titles: #temp_list = ind._decode(title) #if temp_list is None: #continue temp_list = indeed_scrape.toker(title) temp_list = ind.len_tester(temp_list) temp_string = " ".join(temp_list) temp_string = re.sub('\s+amp\s+', ' ', temp_string) temp_string = re.sub('\s+and\s+', ' ', temp_string) out.append(temp_string) out = map(lambda x: x.lower(), out) return out