Beispiel #1
0
def output_lyrics():
    content = request.get_json()
    initial_string = content["initial_string"]
    initial_string = clean(initial_string)
    full_song = generate_text(model, initial_string)
    song_final = clean_song(full_song)
    response = {"id": str(uuid.uuid4()), "song":song_final}
    return jsonify(response)
    def get_token_list(self):
	if not self.token_list:
	    self.token_list = []
	    for i, word in enumerate(self.nodes):
		if i != 0 and word.get_form() != None:
                    token = tc.clean(word.get_form())
                    if token:
	                self.token_list.append(token)
	return self.token_list
Beispiel #3
0
import pandas as pd
from text_clean import clean

conn = pymysql.connect(host='39.97.176.70',
                       port=3306,
                       user='******',
                       password='******',
                       database='test_timeline')

sql = f"""
        SELECT news_text, news_attitude
    FROM
        news_info
    WHERE news_attitude is not NULL;
    """

cur = conn.cursor()
cur.execute(sql)

rows = cur.fetchall()
rows = list(rows)
cur.close()
conn.close()

df = pd.DataFrame(rows, columns=['news_text', 'label'])
df['news_text'] = df['news_text'].apply(lambda x: ' '.join(clean(x)))
df = df[df['news_text'].apply(lambda x: len(x) > 61)]
df.reset_index()

df.to_csv('data/train_set.csv', index=False)
def draw_plot(res, start_date_in, end_date_in, stk):
    try:
        start_date_local = start_date_in
        date_info_date = []
        date_info_num_news = []
        date_info_num_positive_news = []
        date_info_percent = []
        one_day = timedelta(days=1)

        news_date = []
        total_news = []
        pos_news = []
        try:
            while (start_date_local <= end_date_in):
                if (np.is_busday(start_date_local, holidays=KL_holidays)):
                    news_date.append(start_date_local)
                    total_news.append(0)
                    pos_news.append(0)
                start_date_local = start_date_local + one_day

            for doc in res['hits']['hits']:
                doc_date = doc['_source']['published_date']
                doc_date_refined = doc_date[:doc_date.find('+')]
                doc_date_refined = doc_date_refined.replace('T', '-')
                doc_date_refined = doc_date_refined.replace(':', '-')
                doc_date_refined = datetime.strptime(
                    doc_date_refined,
                    '%Y-%m-%d-%H-%M-%S')  #convert to datetime
                doc_date_refined = doc_date_refined.replace(hour=0,
                                                            minute=0,
                                                            second=0)
                if (doc_date_refined in news_date):
                    if (
                            np.is_busday(doc_date_refined,
                                         holidays=KL_holidays)
                    ):  # need to be changed, if news of pubic holidays are also required to be considered
                        total_news[news_date.index(doc_date_refined)] += 1
                        doc_content = doc['_source']['content']
                        doc_content = doc_content.encode(
                            'ascii', 'ignore')  # limits news to english only
                        cleaned_news = clean(
                            news_str=doc_content, configs=config_obj
                        )  #writes in 'elastic_grabbed_news' folder
                        cleaned_news = cleaned_news.encode('ascii', 'ignore')
                        json_stock_prediction = stock_pred.run(
                            cleaned_news,
                            0.5)  # TODO: decide what todo with output
                        if (json_stock_prediction['prediction'][stk]
                            ['prediction'] == 1):
                            pos_news[news_date.index(doc_date_refined)] += 1

            percentage = []
            counter = 0
            while (counter < len(news_date)):
                if total_news[counter] != 0:
                    percentage.append(
                        float(pos_news[counter]) / float(total_news[counter]))
                else:
                    percentage.append(0.0)
                counter += 1

            dates = matplotlib.dates.date2num(news_date)
            ee = dict(zip(dates, percentage))
        except Exception as e:
            PrintException(e, exitStatus=True)

        return ee

    except:
        return 0
 def get_token(self):
     return tc.clean(self.get_form())