class QuoteFeeder(object): ''' classdocs ''' logger = Logger.get_logger(__name__) def __init__(self, folder): if not os.path.exists(folder): os.makedirs(folder) self.folder = folder def get_file_name_by_symbol(self, symbol): return self.folder + os.path.sep + symbol @abc.abstractmethod def download_quotes(self, symbol, skip_existing): return None def fetch(self, symbol, skip_existing=True): QuoteFeeder.logger.debug("Fetching quotes for symbol %s ..." % symbol) file_name = self.get_file_name_by_symbol(symbol) if skip_existing and os.path.exists(file_name): QuoteFeeder.logger.info("Quotes for %s already exists in %s" % (symbol, file_name)) else: file_name = self.download_quotes(symbol, skip_existing) if file_name != None: QuoteFeeder.logger.info("Quotes for %s saved to file %s" % (symbol, file_name)) else: QuoteFeeder.logger.warning("Failed to fetch quotes for %s" % symbol) return file_name
class QuoteUpdater(object): logger = Logger.get_logger(__name__) def __init__(self): ''' Constructor ''' self.quote_loader = QuoteLoader() def connect(self): self.quote_loader.connect() def disconnect(self): self.quote_loader.disconnect() def update_quotes(self, symbols, start_date): conn = http.client.HTTPConnection("ctxalgo.com") url = "/api/ohlc/%s?start-date=%s&end-date=%s" % ( ",".join(symbols), start_date, time.strftime("%Y-%m-%d")) print(url) conn.request("GET", url) response_body = conn.getresponse().read().decode("utf-8") loaded_json = json.loads(response_body) eod_quotes = [] symbol_count = 0 for symbol in loaded_json: symbol_count = symbol_count + 1 arrays = loaded_json[symbol] for index, value in enumerate(arrays["dates"]): date = arrays["dates"][index] open_ = arrays["opens"][index] high = arrays["highs"][index] low = arrays["lows"][index] close = arrays["closes"][index] volume = arrays["volumes"][index] amount = arrays["amounts"][index] eod_quote = CtxEodQuote(symbol, date, open_, high, low, close, volume, amount) eod_quotes.append(eod_quote) self.quote_loader.insert_eod_quote(eod_quote) QuoteUpdater.logger.info( "Updated %d quotes for %d symbols between %s and today" % (len(eod_quotes), symbol_count, start_date)) def update_all_quotes(self, start_date): stocks = Symbols.fetch_all_ctx_stocks() chunks = [stocks[x:x + 50] for x in range(0, len(stocks), 50)] for chunk in chunks: symbols = map(lambda stock: stock.symbol, chunk) self.update_quotes(symbols, start_date)
class QuoteLoader(object): host = "server.jingyusoft.com" keyspace = "stocks" logger = Logger.get_logger(__name__) def connect(self): self.cluster = Cluster(contact_points=[QuoteLoader.host]) self.session = self.cluster.connect(QuoteLoader.keyspace) def disconnect(self): if self.cluster != None: self.cluster.shutdown() def insert_eod_quote(self, eod_quote): cql = "insert into eod_quotes (symbol, date, open, high, low, close, volume) values ('%s', '%s', %.2f, %.2f, %.2f, %.2f, %d)" \ % (eod_quote.symbol, eod_quote.date.strftime("%Y-%m-%d"), eod_quote.open, eod_quote.high, eod_quote.low, eod_quote.close, eod_quote.volume) self.session.execute(cql)
class CassandraSession(object): ''' classdocs ''' host = "server.jingyusoft.com" keyspace = "stocks" logger = Logger.get_logger(__name__) def __init__(self): ''' Constructor ''' def connect(self): self.cluster = Cluster(contact_points=[CassandraSession.host]) self.session = self.cluster.connect(CassandraSession.keyspace) def disconnect(self): self.cluster.shutdown() def execute(self, query, parameters=None, trace=False): return self.session.execute(query, parameters=parameters, trace=trace)
class Symbols(object): ''' classdocs ''' logger = Logger.get_logger(__name__) CtxStock = collections.namedtuple("CtxStock", ["symbol", "name", "short_symbol"], verbose=False) def __init__(self): ''' Constructor ''' @staticmethod def fetch_all_ctx_stocks(): try: Symbols.logger.info("Creating connection to ctxalgo.com ...") conn = http.client.HTTPConnection("ctxalgo.com", timeout=5000) Symbols.logger.info("Fetching stock list from /api/stocks ...") conn.request("GET", "/api/stocks") response = conn.getresponse().read().decode("utf-8") Symbols.logger.debug("Response size is %d bytes" % len(response)) stocks = [] loaded_json = json.loads(response) for symbol in loaded_json: name = loaded_json[symbol] stocks.append(Symbols.CtxStock(symbol, name, symbol[2:])) return stocks except: return [] @staticmethod def search(pattern): symbol = Symbols.search_from_yahoo(pattern) if symbol != None: return [symbol] else: return Symbols.search_from_sina(pattern) @staticmethod def search_from_yahoo(pattern): try: conn = http.client.HTTPConnection("d.yimg.com", timeout=5000) conn.request( "GET", "/aq/autoc?query=%s®ion=US&lang=en-US&callback=YAHOO.util.ScriptNodeDataSource.callbacks" % pattern) response = conn.getresponse().read().decode("utf-8") matched = re.search('(?<="symbol":")[0-9]{6}.[A-Z]{2}', response) if matched != None: return matched.group(0) else: return None except: return None @staticmethod def get_yahoo_symbol_from_ctx_symbol(ctx_symbol): test_yahoo_symbols = [] if ctx_symbol.startswith("sh"): test_yahoo_symbols.append(ctx_symbol[2:] + ".SH") elif ctx_symbol.startswith("sz"): test_yahoo_symbols.append(ctx_symbol[2:] + ".SZ") test_yahoo_symbols.append(ctx_symbol[2:] + ".SS") for test_yahoo_symbol in test_yahoo_symbols: try: conn = http.client.HTTPConnection("ichart.finance.yahoo.com") conn.request("GET", "/table.csv?s=%s" % test_yahoo_symbol) if conn.getresponse().read(4).decode("utf-8") == "Date": return test_yahoo_symbol except: return None return None @staticmethod def search_from_sina(pattern): symbols = [] try: conn = http.client.HTTPConnection("suggest3.sinajs.cn", timeout=10000) conn.request("GET", "/suggest/type=11,12,13,14,15&key=%s" % pattern) response = conn.getresponse().read().decode("gbk") matched = re.search("\"(.*)\"", response) if matched != None: for item in matched.group(1).split(";"): code = item.split(",")[3] conn = http.client.HTTPConnection("finance.sina.com.cn") conn.request("GET", "/realstock/company/%s/nc.shtml" % code) response = conn.getresponse().read().decode("gbk") search_pattern = "<span>(" + pattern + "." start_index = response.index(search_pattern) + len( "<span>(") symbol = response[start_index:start_index + len(pattern) + 3] symbols.append(symbol) return symbols except: return [] @staticmethod def insert_symbol_mapping(cassandra_session, ctx_symbol, yahoo_symbol, name, short_symbol): cassandra_session.execute( "insert into symbols (ctx_symbol, yahoo_symbol, name, short_symbol, update_timestamp) values (%s, %s, %s, %s, dateof(now()))", (ctx_symbol, yahoo_symbol, name, short_symbol))
@author: Univer ''' import sys, os, inspect current_file = inspect.getfile(inspect.currentframe()) parent_dir = os.path.join(os.path.split(current_file)[0], os.path.pardir) parent_folder = os.path.realpath(os.path.abspath(parent_dir)) if parent_folder not in sys.path: sys.path.insert(0, parent_folder) if __name__ == '__main__': from symbols.symbols import Symbols from common.cassandra import CassandraSession from common.logging import Logger logger = Logger.get_logger(__name__) result = Symbols.fetch_all_ctx_stocks() cassandra_session = CassandraSession() try: cassandra_session.connect() for ctx_stock in result: yahoo_symbol = Symbols.get_yahoo_symbol_from_ctx_symbol(ctx_stock.symbol) Symbols.insert_symbol_mapping(cassandra_session, ctx_stock.symbol, yahoo_symbol, ctx_stock.name, ctx_stock.short_symbol) logger.info("%s -> %s %s" % (ctx_stock.symbol, yahoo_symbol, ctx_stock.name)) Symbols.logger.info("Updated %d symbols from CoreTX" % len(result)) finally: cassandra_session.disconnect()