def genData_Test(): #dataHistFile = open('dat.pkl', 'r+b') #dataHist = pickle.load(dataHistFile) # dataFileNumber = dataHist['data_file_number'] + 1 # dataFile = open('data/dat_' + dataFileNumber + '.csv', 'a') dataFile = open('test_data/dat_0.csv', 'a') csvWriter = csv.writer(dataFile) # date = dateHist['last_updated'] date = datetime.datetime(2018,4,12).date() endDate = datetime.date.today() while(date <= endDate): print 'Checking data for ' + date.strftime('%Y-%m-%d') day = date.weekday() if(day == 4 or day == 5): date += datetime.timedelta(days=1) continue fname = date.strftime('%Y-%m-%d') print("File Name: " + fname) file = open('test_data/news/' + fname + '.csv') csv_file = csv.reader(file) for row in csv_file: stockdata = getStockData_Test(row[0], date) if(stockdata == -1): continue print("Origin String: %s" % row[1]) filtered_string = filterString(row[1]) filtered_string = filtered_string.decode("ascii", errors="ignore").encode() print("Filtered String: %s" % filtered_string) try: sentdata = sentiment.analyzeText(filtered_string) except: enchant_string = filterByEnchant(filtered_string) print("Enchanted String: %s" % enchant_string) try: sentdata = sentiment.analyzeText(enchant_string) except: sentdata = sentiment.analyzeText(row[0]) data = [] data.extend((row[0], date.timetuple().tm_yday)) data.extend((sentdata.score, sentdata.magnitude)) # row 0: adj close, row 1: close, row 3: high, row 4: low, row 5: open, row 6: volume data.extend((stockdata[5],stockdata[1],stockdata[0],stockdata[3],stockdata[4],stockdata[6])) # row 0: symbol, row 1: date, row 2: sentiment score, row 3: sentiment magnitude, row 4: open, row 5: close, row 6: adj close, row 7: high, row 8: low, row 9: volume csvWriter.writerow(data) date += datetime.timedelta(days=1)
def get_user_response(): print("rendering response\n") input = request.args.get('text') print("outputting") out = queryAPIForRecipe(sentiment.getIDs(sentiment.analyzeText(input), 0)).split(" - ") return render_template("recipes.html", data=out[0], img=out[1], time=out[2])
def genData(): #dataHistFile = open('dat.pkl', 'r+b') #dataHist = pickle.load(dataHistFile) #dataFileNumber = dataHist['data_file_number'] + 1 #dataFile = open('data/dat_' + dataFileNumber + '.csv', 'a') dataFile = open('data/dat_9.csv', 'a') csvWriter = csv.writer(dataFile) #date = dateHist['last_updated'] #endDate = datetime.date.today() date = datetime.datetime.strptime('27012014', "%d%m%Y").date() endDate = datetime.datetime.strptime('01012017', "%d%m%Y").date() c = csv.reader(open('dict.csv','r')) dic = {} for rw in c: dic[rw[1]] = rw[0] while(date < endDate): #print 'Checking data for ' + date.strftime('%Y-%m-%d') day = date.weekday() if(day == 4 or day == 5): date += datetime.timedelta(days=1) continue fname = date.strftime('%Y-%m-%d') file = open('data/news1/' + fname + '.csv') csv_file = csv.reader(file) for row in csv_file: stockdata = getStockData(dic[row[0]], date) if(stockdata == -1): continue t = '' news = t.join(row[1:]) #print (news) pos, neg = sentiment.analyzeText(news) data = [] data.extend((row[0], date.timetuple().tm_yday)) data.append(pos) data.append(neg) data.extend(stockdata) csvWriter.writerow(data) date += datetime.timedelta(days=1)
def genData(): dataHistFile = open('dat.pkl', 'r+b') dataHist = pickle.load(dataHistFile) dataFileNumber = dataHist['data_file_number'] + 1 dataFile = open('data/dat_' + dataFileNumber + '.csv', 'a') csvWriter = csv.writer(dataFile) date = dateHist['last_updated'] endDate = datetime.date.today() while (date < endDate): print 'Checking data for ' + date.strftime('%Y-%m-%d') day = date.weekday() if (day == 4 or day == 5): date += datetime.timedelta(days=1) continue fname = date.strftime('%Y-%m-%d') file = open('data/news/' + fname + '.csv') csv_file = csv.reader(file) for row in csv_file: stockdata = getStockData(row[0], date) if (stockdata == -1): continue sentdata = sentiment.analyzeText(row[1]) data = [] data.extend((row[0], date.timetuple().tm_yday)) data.extend((sentdata.score, sentdata.magnitude)) data.extend(stockdata) csvWriter.writerow(data) date += datetime.timedelta(days=1) dataHist['data_file_number'] = dataFileNumber dataHist['last_updated'] = endDate dataHistFile.seek(0) pickle.dump(dataHist, dataHistFile, protocol=pickle.HIGHEST_PROTOCOL) dataHistFile.close()
def genData(): #dataHistFile = open('dat.pkl', 'r+b') #dataHist = pickle.load(dataHistFile) # dataFileNumber = dataHist['data_file_number'] + 1 # dataFile = open('data/dat_' + dataFileNumber + '.csv', 'a') dataFile = open('data/dat_1.csv', 'a') csvWriter = csv.writer(dataFile) # date = dateHist['last_updated'] date = datetime.datetime(2018,01,02).date() endDate = datetime.date.today() while(date < endDate): print 'Checking data for ' + date.strftime('%Y-%m-%d') day = date.weekday() if(day == 4 or day == 5): date += datetime.timedelta(days=1) continue fname = date.strftime('%Y-%m-%d') file = open('data/news/' + fname + '.csv') csv_file = csv.reader(file) for row in csv_file: stockdata = getStockData(row[0], date) if(stockdata == -1): continue sentdata = sentiment.analyzeText(row[1]) data = [] data.extend((row[0], date.timetuple().tm_yday)) data.extend((sentdata.score, sentdata.magnitude)) data.extend((stockdata[5],stockdata[1],stockdata[0],stockdata[3],stockdata[4],stockdata[6])) csvWriter.writerow(data) date += datetime.timedelta(days=1)
def prediction(dt): tf.reset_default_graph() session = tf.Session() #init = tf.global_variables_initializer() #session.run(init) model_path = os.getcwd() + '/model/lin-model.ckpt' meta_path = os.getcwd() + '/model/lin-model.ckpt.meta' #saver = tf.train.Saver() saver = tf.train.import_meta_graph(meta_path) saver.restore(session, model_path) # p_test = sess.run(output_layer, feed_dict={stock_data: p_x, stock_price: p_y, keep_prob_input: 1.0, # keep_prob_hidden: 1.0}) graph = tf.get_default_graph() # output_layer = tf.get_collection('output_layer')[0] # stock_data = tf.get_collection('placeholder')[0] # stock_price = tf.get_collection('placeholder')[1] stock_data = graph.get_tensor_by_name('stock_data:0') opening_price = graph.get_tensor_by_name('opening_price:0') stock_price = graph.get_tensor_by_name('stock_price:0') W = tf.get_variable("Weight", [3, 1], dtype=np.float32) y = graph.get_tensor_by_name('operator:0') #keep_prob_input = graph.get_tensor_by_name('keep_prob_input:0') #keep_prob_hidden = graph.get_tensor_by_name('keep_prob_hidden:0') #output_layer = graph.get_tensor_by_name('output_layer:0') today = datetime.datetime.today() t_news_file = open('test_data/news/' + dt.strftime('%Y-%m-%d') + '.csv') print('Collecting Test Data for %s' % (dt + datetime.timedelta(days=1)).strftime('%Y-%m-%d')) p_single_data = [] p_next_data = [] p_csv_file = csv.reader(t_news_file) for row in p_csv_file: p_stockdata = gendata.getStockData_Test(row[0], dt) if (p_stockdata == -1): continue # sentdata = sentiment.analyzeText(row[1]) print("Origin String: %s" % row[1]) filtered_string = gendata.filterString(row[1]) filtered_string = filtered_string.decode("ascii", errors="ignore").encode() print("Filtered String: %s" % filtered_string) try: sentdata = sentiment.analyzeText(filtered_string) except: enchant_string = gendata.filterByEnchant(filtered_string) print("Enchanted String: %s" % enchant_string) try: sentdata = sentiment.analyzeText(enchant_string) except: sentdata = sentiment.analyzeText(row[0]) data = [] data.extend((row[0], dt.timetuple().tm_yday)) data.extend((sentdata.score, sentdata.magnitude)) data.extend((p_stockdata[5], p_stockdata[1], p_stockdata[0], p_stockdata[3], p_stockdata[4], p_stockdata[6])) p_single_data.append(data) if (dt < today): dtnext = dt + datetime.timedelta(days=1) p_stocknextday = gendata.getStockData_Test(row[0], dtnext) if (p_stocknextday is not -1): datanext = [] datanext.extend((row[0], dtnext.timetuple().tm_yday)) datanext.extend((sentdata.score, sentdata.magnitude)) datanext.extend( (p_stocknextday[5], p_stocknextday[1], p_stocknextday[0], p_stocknextday[3], p_stocknextday[4], p_stocknextday[6])) p_next_data.append(datanext) if len(p_single_data) > 0: p_data = np.array(p_single_data) # print('Test Data collected for %s data:' % (dt + datetime.timedelta(days=1)).strftime('%Y-%m-%d') + ', '.join( # str(c) for c in p_data)) print('Test Data collected for %s data:' % (dt + datetime.timedelta(days=1)).strftime('%Y-%m-%d')) p_origin = p_data p_data = preprocessData(p_data) params = p_data.shape[1] - 1 p_x = p_data[:, 0:params] p_y = p_data[:, params].reshape(-1, 1) p_opening_price = p_x[:, params - 1].reshape(-1, 1) shape = p_x.shape print(shape) p_test = session.run(y, feed_dict={ stock_data: p_x, opening_price: p_opening_price, stock_price: p_y }) origin_test = p_origin[:, 2:6] for i in range(origin_test.shape[0]): norm = np.linalg.norm(origin_test[i, :], axis=0) p_test[i, 0] = p_test[i, 0] * norm result_date = dt + datetime.timedelta(days=1) print( 'Predict Data for %s :' % result_date.strftime('%Y-%m-%d') + ', '.join('Stock %s :%s' % (d[0], str(c)) for d, c in zip(p_single_data, p_test))) p_compare_data = [] if (dt < today and len(p_next_data) > 0): p_compare_data = p_single_data else: p_compare_data = p_single_data s_name = [] s_close = [] s_adjclose = [] s_low = [] s_high = [] s_volume = [] s_open = [] s_next_close = [] for i in range(len(p_compare_data)): s_name.append(p_compare_data[i][0]) s_close.append(p_compare_data[i][5]) s_adjclose.append(p_compare_data[i][6]) s_low.append(p_compare_data[i][8]) s_high.append(p_compare_data[i][7]) s_volume.append(p_compare_data[i][9]) s_open.append(p_compare_data[i][4]) if dt < today: s_next_close.append(p_next_data[i][5]) if (dt == today): p_result = pd.DataFrame({ 'symbol': s_name, 'prediction': p_test[:, 0], 'previous open': s_open, 'previous close': s_close, 'previous high': s_high, 'previous low': s_low, 'previous volume': s_volume }) else: p_result = pd.DataFrame({ 'symbol': s_name, 'prediction': p_test[:, 0], 'open': s_open, 'close': s_close, 'next close': s_next_close, # 'prev close': s_adjclose, 'high': s_high, 'low': s_low, 'volume': s_volume }) csv_name = 'test_data/line-result/result-%s.csv' % (result_date.date()) # file_result = open('data/result/' + csv_name, 'rb+') p_result.to_csv(csv_name)
def learn_test(data): data = preprocessData(data) num_params = data.shape[1] - 1 X = data[:, 0:num_params] Y = data[:, num_params].reshape(-1, 1) # Split the data into training and testing sets (70/30) train_X, test_X, train_Y, test_Y = cross_validation.train_test_split( X, Y, test_size=0.30) train_opening_price = train_X[:, num_params - 1].reshape(-1, 1) test_opening_price = test_X[:, num_params - 1].reshape(-1, 1) # Get the initial stock prices for computing the relative cost stock_data = tf.placeholder(tf.float32, [None, num_params]) opening_price = tf.placeholder(tf.float32, [None, 1]) stock_price = tf.placeholder(tf.float32, [None, 1]) # Number of neurons in the hidden layer n_hidden_1 = 3 n_hidden_2 = 3 weights = {'out': tf.Variable(tf.random_normal([n_hidden_2, 1]))} biases = {'out': tf.Variable(tf.random_normal([1]))} # Implement dropout to reduce overfitting keep_prob_input = tf.placeholder(tf.float32) keep_prob_hidden = tf.placeholder(tf.float32) # Hidden layers input_dropout = tf.nn.dropout(stock_data, keep_prob_input) layer_1 = slim.fully_connected(input_dropout, n_hidden_1, biases_initializer=None, activation_fn=tf.nn.relu) layer_1_dropout = tf.nn.dropout(layer_1, keep_prob_hidden) layer_2 = slim.fully_connected(input_dropout, n_hidden_1, biases_initializer=None, activation_fn=tf.nn.relu) layer_2_dropout = tf.nn.dropout(layer_2, keep_prob_hidden) output_layer = tf.add(tf.matmul(layer_2_dropout, weights['out']), biases['out']) learning_rate = 1e-4 cost_function = tf.reduce_mean( tf.pow(tf.div(tf.subtract(stock_price, output_layer), opening_price), 2)) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost_function) last_cost = 0 tolerance = 1e-6 epochs = 1 max_epochs = 1e6 sess = tf.Session() with sess.as_default(): init = tf.global_variables_initializer() sess.run(init) while True: sess.run(optimizer, feed_dict={ stock_data: train_X, opening_price: train_opening_price, stock_price: train_Y, keep_prob_input: 0.8, keep_prob_hidden: 0.5 }) if epochs % 100 == 0: cost = sess.run(cost_function, feed_dict={ stock_data: train_X, opening_price: train_opening_price, stock_price: train_Y, keep_prob_input: 0.8, keep_prob_hidden: 0.5 }) print "Epoch: %d: Error: %f" % (epochs, cost) if abs(cost - last_cost) <= tolerance or epochs > max_epochs: print "Converged." break last_cost = cost epochs += 1 print "Test error: ", sess.run(cost_function, feed_dict={ stock_data: test_X, opening_price: test_opening_price, stock_price: test_Y, keep_prob_input: 1.0, keep_prob_hidden: 1.0 }) test_results = sess.run(output_layer, feed_dict={ stock_data: test_X, stock_price: test_Y, keep_prob_input: 1.0, keep_prob_hidden: 1.0 }) avg_perc_error = 0 max_perc_error = 0 mei = 0 for i in range(len(test_Y)): actual_change = abs(test_Y[i][0] - test_X[i][num_params - 1]) / test_X[i][num_params - 1] predicted_change = abs(test_results[i][0] - test_X[i][num_params - 1] ) / test_X[i][num_params - 1] delta = abs(actual_change - predicted_change) avg_perc_error = avg_perc_error + delta if delta > max_perc_error: max_perc_error = delta mei = i avg_perc_error = (avg_perc_error * 100) / len(test_Y) max_perc_error *= 100 print "Maximum percentage error: %f\nAverage percentage error: %f\n" % ( max_perc_error, avg_perc_error) test_dates = [] #p_f = open(FNAME, 'r') # stocks = p_f.readLines() t_date = datetime.datetime(2018, 03, 01) for i in range(4): test_dates.append(t_date) t_date += datetime.timedelta(days=1) t_date = datetime.datetime(2018, 03, 16) for i in range(3): test_dates.append(t_date) t_date += datetime.timedelta(days=1) print('Prediction Test Dates:' + ', '.join(d.strftime('%Y-%m-%d') for d in test_dates)) precit_data = [] for dt in test_dates: t_news_file = open('test_data/news/' + dt.strftime('%Y-%m-%d') + '.csv') print('Collecting Test Data for %s' % (dt + datetime.timedelta(days=1)).strftime('%Y-%m-%d')) p_single_data = [] p_csv_file = csv.reader(t_news_file) for row in p_csv_file: p_stockdata = gendata.getStockData_Test(row[0], dt) if (p_stockdata == -1): continue sentdata = sentiment.analyzeText(row[1]) data = [] data.extend((row[0], dt.timetuple().tm_yday)) data.extend((sentdata.score, sentdata.magnitude)) data.extend((p_stockdata[5], p_stockdata[1], p_stockdata[0], p_stockdata[3], p_stockdata[4], p_stockdata[6])) p_single_data.append(data) if len(p_single_data) > 0: p_data = np.array(p_single_data) print( 'Test Data collected for %s data:' % (dt + datetime.timedelta(days=1)).strftime('%Y-%m-%d') + ', '.join(str(c) for c in p_data)) p_origin = p_data p_data = normalizeInput(p_data) params = p_data.shape[1] - 1 p_x = p_data[:, 0:params] p_y = p_data[:, params].reshape(-1, 1) p_test = sess.run(output_layer, feed_dict={ stock_data: p_x, stock_price: p_y, keep_prob_input: 1.0, keep_prob_hidden: 1.0 }) orign_test = p_origin[:, 2:6] for i in range(orign_test.shape[0]): norm = np.linalg.norm(orign_test[i, :], axis=0) p_test[i, 0] = p_test[i, 0] * norm result_date = dt + datetime.timedelta(days=1) print( 'Predict Data for %s :' % result_date.strftime('%Y-%m-%d') + ', '.join('Stock %s :%s' % (d[0], str(c)) for d, c in zip(p_single_data, p_test))) s_name = [] s_close = [] s_adjclose = [] s_low = [] s_high = [] s_volume = [] s_open = [] for i in range(len(p_single_data)): s_name.append(p_single_data[i][0]) s_close.append(p_single_data[i][5]) s_adjclose.append(p_single_data[i][6]) s_low.append(p_single_data[i][8]) s_high.append(p_single_data[i][7]) s_volume.append(p_single_data[i][9]) s_open.append(p_single_data[i][4]) p_result = pd.DataFrame({ 'symbol': s_name, 'prediction': p_test[:, 0], 'open': s_open, 'close': s_close, 'prev close': s_adjclose, 'high': s_high, 'low': s_low, 'volume': s_volume }) csv_name = 'test_data/result/result-%s.csv' % result_date.date() #file_result = open('data/result/' + csv_name, 'rb+') p_result.to_csv(csv_name)
def test_prediction(type): test_dates = [] # t_date = datetime.datetime(2018, 03, 01) # for i in range(4): # test_dates.append(t_date) # t_date += datetime.timedelta(days=1) t_date = datetime.datetime(2018, 03, 16) for i in range(3): test_dates.append(t_date) t_date += datetime.timedelta(days=1) with tf.Session() as session: init = tf.global_variables_initializer() session.run(init) model_path = os.getcwd() + '/model/model_' + str(type) + '.ckpt' meta_path = os.getcwd() + '/model/model_' + str(type) + '.ckpt.meta' saver = tf.train.import_meta_graph(meta_path) saver.restore(session, model_path) # p_test = sess.run(output_layer, feed_dict={stock_data: p_x, stock_price: p_y, keep_prob_input: 1.0, # keep_prob_hidden: 1.0}) graph = tf.get_default_graph() # output_layer = tf.get_collection('output_layer')[0] # stock_data = tf.get_collection('placeholder')[0] # stock_price = tf.get_collection('placeholder')[1] stock_data = graph.get_tensor_by_name('stock_data:0') opening_price = graph.get_tensor_by_name('opening_price:0') stock_price = graph.get_tensor_by_name('stock_price:0') keep_prob_input = graph.get_tensor_by_name('keep_prob_input:0') keep_prob_hidden = graph.get_tensor_by_name('keep_prob_hidden:0') output_layer = graph.get_tensor_by_name('output_layer:0') print('Prediction Test Dates:' + ', '.join(d.strftime('%Y-%m-%d') for d in test_dates)) precit_data = [] for dt in test_dates: t_news_file = open('test_data/news/' + dt.strftime('%Y-%m-%d') + '.csv') print('Collecting Test Data for %s' % (dt + datetime.timedelta(days=1)).strftime('%Y-%m-%d')) p_single_data = [] p_csv_file = csv.reader(t_news_file) for row in p_csv_file: p_stockdata = gendata.getStockData_Test(row[0], dt) if (p_stockdata == -1): continue sentdata = sentiment.analyzeText(row[1]) data = [] data.extend((row[0], dt.timetuple().tm_yday)) data.extend((sentdata.score, sentdata.magnitude)) data.extend((p_stockdata[5], p_stockdata[1], p_stockdata[0], p_stockdata[3], p_stockdata[4], p_stockdata[6])) p_single_data.append(data) if len(p_single_data) > 0: p_data = np.array(p_single_data) print( 'Test Data collected for %s data:' % (dt + datetime.timedelta(days=1)).strftime('%Y-%m-%d') + ', '.join(str(c) for c in p_data)) p_origin = p_data p_data = normalizeInput(p_data, type) if type == 0: params = p_data.shape[1] - 1 else: params = p_data.shape[1] - 2 p_x = p_data[:, 0:params] p_y = p_data[:, params].reshape(-1, 1) p_test = session.run(output_layer, feed_dict={ stock_data: p_x, stock_price: p_y, keep_prob_input: 1.0, keep_prob_hidden: 1.0 }) origin_test = None if type == 0: origin_test = p_origin[:, 2:6] else: origin_test = p_origin[:, 2:7] for i in range(origin_test.shape[0]): norm = np.linalg.norm(origin_test[i, :], axis=0) p_test[i, 0] = p_test[i, 0] * norm result_date = dt + datetime.timedelta(days=1) print( 'Predict Data for %s :' % result_date.strftime('%Y-%m-%d') + ', '.join('Stock %s :%s' % (d[0], str(c)) for d, c in zip(p_single_data, p_test))) s_name = [] s_close = [] s_adjclose = [] s_low = [] s_high = [] s_volume = [] s_open = [] for i in range(len(p_single_data)): s_name.append(p_single_data[i][0]) s_close.append(p_single_data[i][5]) s_adjclose.append(p_single_data[i][6]) s_low.append(p_single_data[i][8]) s_high.append(p_single_data[i][7]) s_volume.append(p_single_data[i][9]) s_open.append(p_single_data[i][4]) p_result = pd.DataFrame({ 'symbol': s_name, 'prediction': p_test[:, 0], 'open': s_open, 'close': s_close, # 'prev close': s_adjclose, 'high': s_high, 'low': s_low, 'volume': s_volume }) csv_name = 'test_data/result/result-%s_model_%d.csv' % ( result_date.date(), type) # file_result = open('data/result/' + csv_name, 'rb+') p_result.to_csv(csv_name)