def read_csv_to_cassandra(csv_path): with open(csv_path, "rb") as f_obj: reader = csv.reader(f_obj) for i, row in enumerate(reader): if i > 0: try: cql = "INSERT INTO historical_spots (symbol, date_eod, open, close," \ " low, high, volume) VALUES (%s, %s, %s, %s, %s, %s, %s)" session.execute( cql, (row[1], row[0], float(row[2]), float(row[3]), float(row[4]), float(row[5]), float(row[6]))) print "Successfully inserted row: " + str(i) except Exception as e: print "{} row has failed to insert because of the following Exception: {}".format( str(i), str(e)) print " ".join(row)
def fetch_close_spots(symbol): prepared_stmt = session.prepare( "select date_eod, close from historical_spots where symbol = ?") bound_stmt = prepared_stmt.bind([symbol]) result = OrderedDict() db_results = session.execute(bound_stmt) for row in db_results: result[row.date_eod.__str__()] = row.close return result
def grab_intraday_spots(URL, symbol_list, interval, output_size, api_key): for symbol in symbol_list: try: URL += "&symbol=" + symbol + "&interval=" + interval + "&outputsize=" + output_size + \ "&apikey=" + api_key response = requests.get(URL) raw_data = json.loads(response.text)['Time Series (' + interval + ')'] sorted_data = sorted(raw_data) data = raw_data[sorted_data[len(sorted_data) - 1]] cql = "INSERT INTO intraday_spots (symbol, time_uuid, open, close, low, high, volume) VALUES" \ " (%s, %s, %s, %s, %s, %s, %s) USING TTL 86400;" session.execute(cql, (symbol, uuid.uuid1(), float(data["1. open"]), float(data["4. close"]), float(data["3. low"]), float(data["2. high"]), float(data["5. volume"]))) except Exception as e: print "Exception while API call: " + str(e)
def read_csv_to_cassandra(csv_path): with open(csv_path, "rb") as f_obj: reader = csv.reader(f_obj) batch, insert_statement = get_new_batch() symbol = '' for i, row in enumerate(reader): try: if i > 0: if symbol != row[1]: if i > 1: execute_batch(batch) symbol = row[1] batch, insert_statement = get_new_batch() batch.add( insert_statement, (row[1], datetime.datetime.strptime( row[0], "%Y-%m-%d"), float(row[2]), float(row[3]), float(row[4]), float(row[5]), float(row[6]))) except Exception as e: print "{} row has failed to insert because of the following Exception: {}".format( str(i), str(e)) print " ".join(row) session.execute(batch)
def get_all_intraday_data(self): results = [] prepared_stmt = session.prepare( "select * from intraday_spots where symbol = ?") bound_stmt = prepared_stmt.bind([self.symbol]) db_results = session.execute(bound_stmt) for row in db_results: results.append({ 'timestamp': util.datetime_from_uuid1(row.time_uuid).__str__(), 'open': row.open, 'close': row.close, 'low': row.low, 'high': row.high, 'volume': row.volume }) return results
from DataAccess.CassandraConn import session import uuid import sched import time import requests import json scheduler = sched.scheduler(time.time, time.sleep) prepared_stmt = session.prepare("select distinct symbol from historical_spots") row_list = session.execute(prepared_stmt) symbol_list = [] for row in row_list: symbol_list.append(row.symbol) arguments = ("https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY", symbol_list, "15min", "compact", "XJ3VBRI216I7CZ9C") def schedule_grab_from_api(interval, arguments): while True: grab_intraday_spots(*arguments) print "Grab completed at: " + str(time.time()) time.sleep(interval * 60) def grab_intraday_spots(URL, symbol_list, interval, output_size, api_key): for symbol in symbol_list: try: URL += "&symbol=" + symbol + "&interval=" + interval + "&outputsize=" + output_size + \ "&apikey=" + api_key response = requests.get(URL) raw_data = json.loads(response.text)['Time Series (' + interval +
def execute_batch(batch): session.execute(batch)