def output_lyrics(): content = request.get_json() initial_string = content["initial_string"] initial_string = clean(initial_string) full_song = generate_text(model, initial_string) song_final = clean_song(full_song) response = {"id": str(uuid.uuid4()), "song":song_final} return jsonify(response)
def get_token_list(self): if not self.token_list: self.token_list = [] for i, word in enumerate(self.nodes): if i != 0 and word.get_form() != None: token = tc.clean(word.get_form()) if token: self.token_list.append(token) return self.token_list
import pandas as pd from text_clean import clean conn = pymysql.connect(host='39.97.176.70', port=3306, user='******', password='******', database='test_timeline') sql = f""" SELECT news_text, news_attitude FROM news_info WHERE news_attitude is not NULL; """ cur = conn.cursor() cur.execute(sql) rows = cur.fetchall() rows = list(rows) cur.close() conn.close() df = pd.DataFrame(rows, columns=['news_text', 'label']) df['news_text'] = df['news_text'].apply(lambda x: ' '.join(clean(x))) df = df[df['news_text'].apply(lambda x: len(x) > 61)] df.reset_index() df.to_csv('data/train_set.csv', index=False)
def draw_plot(res, start_date_in, end_date_in, stk): try: start_date_local = start_date_in date_info_date = [] date_info_num_news = [] date_info_num_positive_news = [] date_info_percent = [] one_day = timedelta(days=1) news_date = [] total_news = [] pos_news = [] try: while (start_date_local <= end_date_in): if (np.is_busday(start_date_local, holidays=KL_holidays)): news_date.append(start_date_local) total_news.append(0) pos_news.append(0) start_date_local = start_date_local + one_day for doc in res['hits']['hits']: doc_date = doc['_source']['published_date'] doc_date_refined = doc_date[:doc_date.find('+')] doc_date_refined = doc_date_refined.replace('T', '-') doc_date_refined = doc_date_refined.replace(':', '-') doc_date_refined = datetime.strptime( doc_date_refined, '%Y-%m-%d-%H-%M-%S') #convert to datetime doc_date_refined = doc_date_refined.replace(hour=0, minute=0, second=0) if (doc_date_refined in news_date): if ( np.is_busday(doc_date_refined, holidays=KL_holidays) ): # need to be changed, if news of pubic holidays are also required to be considered total_news[news_date.index(doc_date_refined)] += 1 doc_content = doc['_source']['content'] doc_content = doc_content.encode( 'ascii', 'ignore') # limits news to english only cleaned_news = clean( news_str=doc_content, configs=config_obj ) #writes in 'elastic_grabbed_news' folder cleaned_news = cleaned_news.encode('ascii', 'ignore') json_stock_prediction = stock_pred.run( cleaned_news, 0.5) # TODO: decide what todo with output if (json_stock_prediction['prediction'][stk] ['prediction'] == 1): pos_news[news_date.index(doc_date_refined)] += 1 percentage = [] counter = 0 while (counter < len(news_date)): if total_news[counter] != 0: percentage.append( float(pos_news[counter]) / float(total_news[counter])) else: percentage.append(0.0) counter += 1 dates = matplotlib.dates.date2num(news_date) ee = dict(zip(dates, percentage)) except Exception as e: PrintException(e, exitStatus=True) return ee except: return 0
def get_token(self): return tc.clean(self.get_form())