def updateJSON_prices(sym): dailyDat = rqstStockTSDataDaily(sym) dat = read_articles() for article in dat: # round down to nearest hour t = (datetime.datetime.strptime( article["date_published"], "%Y-%m-%dT%H:%M:%SZ") - datetime.timedelta(hours=5)).replace(second=0, minute=0) # print(dailyDat) result = getTSDataDailyForceSuccessFuture(dailyDat, t, 1000) if not result: article["delta"] = float(0) else: article["delta"] = result[1]["close"] - result[1]["open"] if t.date() == result[0].date(): article["const"] = True else: article["const"] = False write_articles(dat)
def json_to_df(nkeywords): def get_datetime(article): return datetime.datetime.strptime(article['date_published'], "%Y-%m-%dT%H:%M:%SZ") def get_dtnearest_hr(article): return (get_datetime(article) - datetime.timedelta(hours=5)).replace(second=0, minute=0) def get_timeframe(data): dt_start = get_dtnearest_hr(data[0]) dt_end = get_dtnearest_hr(data[-1]) diff = dt_end - dt_start tot_hours = diff.days * 24 + diff.seconds / 3600 + 1 timeframe = pd.date_range(start=str(dt_start), end=str(dt_end), periods=tot_hours) return timeframe data = read_articles() keywords = read_keywords()[0:nkeywords] hourlyStock = read_hourlyStock() data = sorted(data, key=lambda entry: get_datetime(entry)) timeframe = get_timeframe(data) feat_mat = np.zeros((len(timeframe), len(keywords)), int) labels = np.array(list(hourlyStock.values())) j = 0 for i in range(len(timeframe)): if j < len(data) and timeframe[i] == get_dtnearest_hr(data[j]): # labels[i] = data[j]['delta'] while j < len(data) and timeframe[i] == get_dtnearest_hr(data[j]): feat_row = [1 if keyword in data[j]['keywords'] else 0 for keyword in keywords] feat_mat[i] += feat_row j += 1 else: feat_mat[i] = feat_mat[i - 1] X = pd.DataFrame(feat_mat, timeframe, keywords) y = pd.DataFrame(labels, timeframe, ['stock_change']) return X, y
from load_articles import read_articles import json def write_valuation(valuation_dictionary): with open('valuation.json', 'w') as fp: json.dump(valuation_dictionary, fp) def read_valuation(): with open('valuation.json') as f: return json.load(f) if __name__ == '__main__': articles = read_articles() valuation_dictionary = {} for article in articles: valuation = article['delta'] for keyword in article['keywords']: if keyword not in valuation_dictionary: valuation_dictionary[keyword] = 0 valuation_dictionary[keyword] += valuation write_valuation(valuation_dictionary)
def updateJSON_prices(sym): APIKey = "442ONKXSVHA79170" # redact in submissions APIbase = "https://www.alphavantage.co/query" def rqstStockTSDataDaily(sym): r = None tries = 0 maxTries = 10 while not r and tries < maxTries: r = requests.get(APIbase, params={ "function": "TIME_SERIES_DAILY", "symbol": sym, "outputsize": "full", "apikey": APIKey }) tries += 1 if not r: raise ValueError("Something unexpected happened.") return r.json()["Time Series (Daily)"] def getTSDataDaily(stockDat, t): try: e = stockDat[t.strftime("%Y-%m-%d")] return { "open": round(float(e["1. open"]), 2), "close": round(float(e["4. close"]), 2), "high": round(float(e["2. high"]), 2), "low": round(float(e["3. low"]), 2), "volume": int(e["5. volume"]) } except KeyError: return False def getTSDataDailyForceSuccessFuture(stockDat, t, maxFail): y = getTSDataDaily(stockDat, t) i = 0 while not y: t += datetime.timedelta(days=1) y = getTSDataDaily(stockDat, t) i += 1 if i == maxFail: return False return (t, y) def get_datetime(article): return datetime.datetime.strptime(article['date_published'], "%Y-%m-%dT%H:%M:%SZ") def get_dtnearest_hr(article): return (get_datetime(article) - datetime.timedelta(hours=5)).replace( second=0, minute=0) def get_timeframe(data): dt_start = get_dtnearest_hr(data[0]) dt_end = get_dtnearest_hr(data[-1]) diff = dt_end - dt_start tot_hours = diff.days * 24 + diff.seconds / 3600 + 1 timeframe = pd.date_range(start=str(dt_start), end=str(dt_end), periods=tot_hours) return timeframe # Updates hourlyStock dailyDat = rqstStockTSDataDaily(sym) dat = read_articles() dat = sorted(dat, key=lambda entry: get_datetime(entry)) timeframe = get_timeframe(dat) hourlyStock = dict.fromkeys(timeframe) for t in timeframe: result = getTSDataDailyForceSuccessFuture(dailyDat, t, 1000) hourlyStock[t] = result[1]["close"] - result[1]["open"] hourlyStock = {str(k): v for k, v in hourlyStock.items()} write_hourlyStock(hourlyStock) # Updates the articles with delta values for article in dat: # round down to nearest hour t = (datetime.datetime.strptime(article["date_published"], "%Y-%m-%dT%H:%M:%SZ") - datetime.timedelta(hours=5)).replace(second=0, minute=0) # print(dailyDat) result = getTSDataDailyForceSuccessFuture(dailyDat, t, 1000) if not result: article["delta"] = float(0) else: article["delta"] = result[1]["close"] - result[1]["open"] if t.date() == result[0].date(): article["const"] = True else: article["const"] = False write_articles(dat)