def fetchNewsHeadlines(ek, companyName): """ Mine the headlines for the given company """ # companyName = 'R:RDSa.L' df = ek.get_news_headlines(companyName+' AND Language:LEN', date_to=\ "2018-05-22", count=100) return df
def get_news(ric): headlines = ek.get_news_headlines(ric + ' IN CHINESE', date_from=from_date, date_to=today, count=100) return [{ 'time': datetime.date.strftime(a, '%Y-%m-%dT%H:%M:%S'), 'source': d, 'headline': b, 'storyid': c, 'content': ek.get_news_story(c) } for a, b, c, d in zip(headlines['versionCreated'], headlines['text'], headlines['storyId'], headlines['sourceCode'])]
def do_day(self, date, ric, do_clean): pathtodir = "{}\\{}\\{}\\{}\\{}".format(self.outputdir, date.year, date.month, date.day, ric) with eventlet.Timeout(10, False) as event: try: news = ek.get_news_headlines( ric + ' AND Language:LEN', date_from=date.strftime("%Y-%m-%d"), date_to=(date + datetime.timedelta(days=1)).strftime("%Y-%m-%d"), count=100) self.process_headlines(news, pathtodir, date, ric, do_clean) except Exception as e: self._handle_time(e, event)
def news(): rqst = request.get_json() try: rst = eikon.get_news_headlines(query=rqst['query'], count=100, date_from=rqst['from'], date_to=rqst['to'], raw_output=True) for idx, val in enumerate(rst['headlines']): val['news'] = eikon.get_news_story(val['storyId']) return jsonify(data=rst) except Exception as err: webapp.logger.warn(err) return abort(500)
def news_occurance(ticker, regional_awards: list, international_awards: list, date_to: str, date_from: str): """ We stick to refinitiv convention that date_to predeceeds date_from """ def award_check(awards, text): return sum([award in text for award in awards]) regional_occurance = 0 international_occurance = 0 i = 0 n_df = 0 while date_from != date_to: old_date_from = date_from news = ek.get_news_headlines(query=f"R:{ticker}", date_to=date_to, date_from=date_from, count=100) if len(news) == 0: break if i % 25 == 0: print(date_from) news = news.sort_values("versionCreated", ascending=False).reset_index(drop=True) news["text"] = news["text"].apply(lambda x: x.lower()) date_from = str(news.loc[0, "versionCreated"].date()) if old_date_from == date_from: if n_df >= 3: n_df = 0 print(f"Stuck, breaking for ticker = {ticker}") break n_df += 1 regional_occurance += sum( news["text"].apply(lambda x: award_check(regional_awards, x))) international_occurance += sum( news["text"].apply(lambda x: award_check(international_awards, x))) i += 1 occurance = { "Ticker": ticker, "regional_occurance": regional_occurance, "international_occurance": international_occurance } return occurance
def downloadNews(undlName, date, savePath): if not checkFolderExist(savePath + formatDate(date)): createFolder(savePath, formatDate(date)) # download data df = ek.get_news_headlines( "R:" + undlName + " and english", date_from=formatDate(moveDate(date, -1)) + "T16:00:00", date_to=formatDate(moveDate(date)) + "T16:00:00", count=100) # move date back to HK time df.index = moveDate(np.array(list(df.index)), 0, 8) df.versionCreated = moveDate(np.array(list(df.versionCreated)), 0, 8) # save data df.to_csv(savePath + formatDate(date) + "/" + undlName + "_headlines.csv")
def makeResponse(req): if req.get("result").get("action") != "fetchNews": return {} result = req.get("result") parameters = result.get("parameters") ticker = parameters.get("ticker") if ticker is None: return None tr.set_app_key('229dfa317f614c3c9cdfa2908c1ff66af4fdf1c6') r = tr.get_news_headlines('ticker', date_from='2020-03-09', date_to='2020-03-10', count=5) r.head() news = r("text") print(news) result = "The news on" + ticker + "are" + news return { "text": result, }
def get_news(topic, source, start_date, end_date, n_articles): ''' To search a company, Append 'R:' in front of RIC (ticker followed by a dot and Echange ID (eg: TSLA.O)) The following is a list of news sources and their respective code from Mexico. mexican_sources = { 'Mexico Ledger': 'MEXLED', 'El Financiero': 'ELMEX', 'El Economista': 'ELECOP', 'La Jornada': 'LAMEX', 'Mega News': 'MEGNEW', 'Milenio': 'MILMEX', 'El Nacional': 'ELNACI', 'Expansion': 'EXPSPB', 'Excelsior': 'EXCMEX', 'Mural': 'MURMEX', 'El Norte': 'ELNORT', 'Estrategia': 'ESTMEX', 'La I': 'LAIMEX', 'Al Chile': 'ALCHIL', 'Publimetro': 'PUBMEX', 'Comunicae': 'COMMEX', 'Diario de Yucatan': 'DIADEY', 'Contexto Durango': 'CONDED', } ''' # formats query based on function inputs #search = '{topic} and NS:{source}'.format(topic=topic,source=source) search = topic # getting headlines headlines = ek.get_news_headlines(query=search, date_from=start_date, date_to=end_date, count=n_articles) return headlines
# Create list of all dates datelist = [] for t in range(0, numdays): datelist.append((today - datetime.timedelta(days=t)).date()) # Retrieve daily headlines APIcode = 'R:AAPL.O AND Language:LEN' frames = [] for date in datelist: # Volontary slow down request (avoid too many requests error from eikon) for i in range(0, 1000): print('I love data science') # Get headline df = ek.get_news_headlines(APIcode, date_from=date, date_to=date, count=100) frames.append(df) # Create news dataframe news = pd.concat(frames) # Drop duplicates news = news.drop_duplicates(subset='text', keep='last') # Retrieve content of news story = [] for storyId in news['storyId'].values: # Volontary slow down request (avoid too many requests error from eikon) for i in range(0, 1000): print('I love data science')
import numpy import pandas import eikon eikon.set_app_key('638fa3bbb90349d5a97dc60c1c0cc4b0b5646846') eikon.get_news_headlines('R:LHAG.DE', date_from='2018-01-01T00:00:00', date_to='2018-12-13T18:00:00') #df = eikon.get_timeseries("SPY", # ['TR.IndexConstituentRIC', # 'TR.IndexConstituentName', # 'TR.IndexConstituentWeightPercent'], data_grid, err = eikon.get_data("SPY", ['TR.IndexConstituentRIC', {'TR.IndexConstituentWeightPercent':{'sort_dir':'desc'}}], {'SDate':'2018-12-14'} ) print(data_grid) weight = data_grid.loc[:, "Weight percent"] weight import matplotlib.pyplot as plt fig1, ax1 = plt.subplots() ax1.pie(weight, shadow = True) ax1.axis('equal') plt.show() NESN = eikon.get_timeseries(["NESN.S"], start_date="2014-01-06", end_date="2018-12-20", interval="daily") plt.plot(NESN.loc[:, "CLOSE"])
print('Will be back {}'.format((curr_time + datetime.timedelta(seconds = number)).strftime('%Y-%m-%d %H:%M:%S'))) event.cancel() time.sleep(number) else: print(ric, ' failed - ', e.message) else: print(ric, ' failed - ', e) for ric in rics: for date in date_generated: with eventlet.Timeout(10,False) as event: try: news = ek.get_news_headlines(ric+' AND Language:LEN', date_from=date.strftime("%Y-%m-%d"), date_to=(date + datetime.timedelta(days=1)).strftime("%Y-%m-%d"), count=100 ) except Exception as e: _handle_time(e, event) continue for i in range(len(news)): time.sleep(0.2) storyId = news.iat[i,2] storyid = re.sub(':', '_', storyId) path = "{}\\{}\\{}\\{}\\{}.txt".format(date.year, date.month, date.day, ric, storyid) os.makedirs(os.path.dirname(path), exist_ok=True) # HTML story with eventlet.Timeout(10,False) as event: try:
def get_news_and_sentiments(key_id, ticker_and_params, ticker, start_date, end_date): ek.set_app_key(key_id) Date_range = pd.date_range(start=start_date, end=end_date, freq='D') date_range = [] for i in range(len(Date_range) - 1): start_date = pd.to_datetime(Date_range)[i].strftime( "%Y-%m-%dT%H:%M:%S") end_date = pd.to_datetime(Date_range)[i + 1].strftime("%Y-%m-%dT%H:%M:%S") date_range.append([]) for j in range(1): date_range[i].append(start_date) date_range[i].append(end_date) example_data = pd.DataFrame() for i in range(len(date_range)): example_data = example_data.append(ek.get_news_headlines( f'{ticker_and_params}', count=10, date_from=date_range[i][0], date_to=date_range[i][1]), ignore_index=False) news = example_data stories = [] for i, storyId in enumerate(news['storyId']): try: html = ek.get_news_story(storyId) story = BeautifulSoup(html, 'html5lib').get_text() stories.append(story) except: stories.append('') news['story'] = stories ## Sentiment Over Time sentiment = pd.DataFrame() sid = SentimentIntensityAnalyzer() for storyId in news['storyId']: row = news[news['storyId'] == storyId] scores = sid.polarity_scores(row['story'][0]) sentiment = sentiment.append( pd.DataFrame(scores, index=[row['versionCreated'][0]])) sentiment.index = pd.DatetimeIndex(sentiment.index) sentiment.sort_index(inplace=True) sentiment_list = list(sentiment['compound']) news['sentiment'] = sentiment_list #group by day dates_normal = [] for i in range(len(news['versionCreated'])): dates_normal.append(news['versionCreated'][i].strftime("%Y-%m-%d")) dates_normal_2 = pd.to_datetime(dates_normal) news['dates_normal'] = dates_normal_2 #save to csv news.to_csv(f"{ticker}_news.csv") daily_sentiments_listed = news.groupby( pd.Grouper(key="dates_normal", freq="D"))['sentiment'].apply(list).reset_index() #Daily mood index DMI = [] for i in range(len(daily_sentiments_listed['sentiment'])): pos = 0 neg = 0 for j in range(len(daily_sentiments_listed['sentiment'][i])): try: if daily_sentiments_listed['sentiment'][i][j] > 0: pos += 1 elif daily_sentiments_listed['sentiment'][i][j] < 0: neg += 1 except: pass DMI.append(np.log((1 + pos) / (1 + neg))) daily_sentiments_listed['DMI'] = DMI #Average Sentiment Average_S = [] for i in range(len(daily_sentiments_listed['sentiment'])): Average_S.append(np.mean(daily_sentiments_listed['sentiment'][i])) daily_sentiments_listed['Average_Sentiment'] = pd.DataFrame( Average_S).fillna(0) #save sentiments to csv daily_sentiments_listed.to_csv(f"{ticker}_sentiment.csv", index=False) return daily_sentiments_listed, news
import eikon as ek ek.set_app_key('ba4e52456ba64a87be3b82001782535f159b564e') print( ek.get_news_headlines('R:IBM.N', date_from='2019-09-27T09:00:00', date_to='2019-09-27T10:00:00'))
# RIC: reuters instrument code, Data item browser에서 검색하여 찾는다. rics = [ 'GE', # general electronic stock 'AAPL.O', # apple stock '.SPX', # S&P 500 index '.VIX', # VIX volatility 'EUR=', # EUR/USD exchange rate 'XAU=' # Gold 'DE10YT=RR' # 10yr Germany Bond ] # get time series data ts = ek.get_timeseries('AAPL.O', start_date='2019-01-01') # get news headline, date_to를 기준으로 하루치만 가능 nh = ek.get_news_headlines('R:AAPL.O', date_from='2018-01-01', date_to='2018-01-31') # convert RICs into ISINs(international securities identification numbers) and ticker isins_tickers = ek.get_symbology(rics, from_symbol_type='RIC', to_symbol_type=['ISIN', 'ticker']) # other symbol types(ex. SEDOL: stock exchange daily official list) can also be converted to RICs or ISINs sedols = ['B1YW440', '0673123', 'B02J639', 'B1XZS82', '0045614', '0053673', '0989529', '0216238', '0969703', '0263494'] transed_rics = ek.get_symbology(sedols, from_symbol_type='SEDOL', to_symbol_type=['RIC', 'ISIN']) # historical data will be retrieved for the following stocks symbols = ['US0378331005', 'US0231351067', 'US30303M1027', 'US4581401001'] rics = ek.get_symbology(symbols, from_symbol_type='ISIN', to_symbol_type='RIC') rics = list(rics.RIC.values) data = ek.get_timeseries(rics, # the RICs fields='CLOSE', # close field start_date='2017-10-01', # start date
import eikon as ek import numpy import pandas ek.set_app_key('cd027055730e46e08ec6eeeaf0986d86824ec3ff') ford_news = ek.get_news_headlines('FORD', date_from='2019-03-14T09:00:00', date_to='2019-03-15T18:00:00') print(ford_news[1:]) print(len(ford_news)) ford_news.to_csv('ford1.csv', sep='\t', encoding='utf-8') headlines = ek.get_news_headlines('EU AND POL',1) story_id = headlines.iat[0,2] print(ek.get_news_story(story_id))
# In[81]: Amazon_stock.iplot(kind="lines") # In[52]: Amazon_stock["CLOSE"].iplot() # In[55]: ek.get_news_headlines("R:AMZN.O", date_from = "2020-07-08T13:00:00", date_to="2020-07-08T22:00:00", count = 5) # In[65]: Amazon_stock_1 = ek.get_timeseries("AMZN.O", fields="CLOSE", start_date="2019-01-01", end_date="2020-07-08") # In[66]: Amazon_stock_1 # In[95]:
import eikon as ek from datetime import date import re import lxml.html import pandas as pd import win32com.client outlook = win32com.client.Dispatch("Outlook.Application") mail = outlook.CreateItem(0) ek.set_app_id('<Eikon_token_id>') #start_date, end_date = date(2017, 10, 22), date.today() start_date, end_date = date(2017, 10, 1), date(2017, 12, 14) q = "Product:IFRC AND Topic:ISU AND (\"PRICED\" OR \"DEAL\")" headlines = ek.get_news_headlines(query=q, date_from=start_date, date_to=end_date, count=100) headlines = headlines[headlines['storyId'].str.contains('nIFR')] headlines.to_csv('test.csv') def termsheet_to_dict(storyId): x = ek.get_news_story(storyId) story = lxml.html.document_fromstring(x).text_content() matches = dict(re.findall(pattern=r"\[(.*?)\]:\s?([A-Z,a-z,0-9,\-,\(,\),\+,/,\n,\r,\.,%,\&,>, ]+)", string=story)) clean_matches = {key.strip(): item.strip() for key, item in matches.items()} return clean_matches result = [] index = pd.DataFrame(headlines, columns=['storyId']).values.tolist() for i, storyId in enumerate(index): x = termsheet_to_dict(storyId[0])