def books_from_lines_v1(lines, debug=False, end=None, drop_out_of_order=False): currBook = None # keep track of which side the book starts on, # if that side repeats we've reached a new book startSide = None nLine = 0 nBooks = 0 keep_out_of_order = not drop_out_of_order maxTimestamp = None book_list = [] for line in lines: if end and nBooks > end: break nLine += 1 if line[0:9] == "ORDERBOOK": nBooks += 1 if currBook is not None: if keep_out_of_order or currBook.lastUpdateTime == maxTimestamp: book_list.append(currBook) timestr = line[10:] lastUpdateTime = parse_datetime_opt(timestr) if maxTimestamp is None or lastUpdateTime > maxTimestamp: maxTimestamp = lastUpdateTime currBook = OB(lastUpdateTime = lastUpdateTime) else: row = line.split(',') side = row[obc.SIDE] entry = Order( timestamp = parse_datetime_opt(row[obc.TIMESTAMP]), side = side, level = int(row[obc.LEVEL]), price = float(row[obc.PRICE]), size = long(row[obc.SIZE]), #orderdepthcount = int(row[obc.ORDERDEPTHCOUNT]) #ccy = row[obc.CURRENCY] ) if (side == obc.BID): currBook.add_bid(entry) elif (side == obc.OFFER): currBook.add_offer(entry) return book_list
def start_new_orderbook(self, line): self.at_start_of_file = False # periodically clear the order cache so it doesn't eat all the memory if len(self.order_cache) > 5000: self.order_cache.clear() if self.currBook: self.books.append(self.currBook) _, _, monotonic_time, exchange_time = line.split(',') monotonic_seconds, _, monotonic_nanoseconds = monotonic_time.partition( ':') epoch_seconds, _, exchange_nanoseconds = exchange_time.partition(':') epoch_seconds = long(epoch_seconds) exchange_seconds = epoch_seconds % self.SECONDS_PER_DAY exchange_day = epoch_seconds / self.SECONDS_PER_DAY update_time = exchange_seconds * 1000 + long(exchange_nanoseconds) / ( 10**6) monotonic_time = long(monotonic_seconds) * 1000 + long( monotonic_nanoseconds) / (10**6) self.currBook = OB(day=exchange_day, lastUpdateTime=update_time, lastUpdateMonotonic=monotonic_time, actions=self.actions) self.actions = []
def start_new_orderbook(self, line): self.at_start_of_file = False # periodically clear the order cache so it doesn't eat all the memory if len(self.order_cache) > 5000: self.order_cache.clear() if self.currBook: self.books.append(self.currBook) _, _, monotonic_time, exchange_time = line.split(',') monotonic_seconds, _, monotonic_nanoseconds = monotonic_time.partition(':') epoch_seconds, _, exchange_nanoseconds = exchange_time.partition(':') epoch_seconds = long(epoch_seconds) exchange_seconds = epoch_seconds % self.SECONDS_PER_DAY exchange_day = epoch_seconds / self.SECONDS_PER_DAY update_time = exchange_seconds * 1000 + long(exchange_nanoseconds) / (10**6) monotonic_time = long(monotonic_seconds)*1000 + long(monotonic_nanoseconds) / (10**6) self.currBook = OB( day = exchange_day, lastUpdateTime = update_time, lastUpdateMonotonic = monotonic_time, actions = self.actions ) self.actions = []
def books_from_lines_v1(lines, debug=False, end=None, drop_out_of_order=False): currBook = None # keep track of which side the book starts on, # if that side repeats we've reached a new book startSide = None nLine = 0 nBooks = 0 keep_out_of_order = not drop_out_of_order maxTimestamp = None book_list = [] for line in lines: if end and nBooks > end: break nLine += 1 if line[0:9] == "ORDERBOOK": nBooks += 1 if currBook is not None: if keep_out_of_order or currBook.lastUpdateTime == maxTimestamp: book_list.append(currBook) timestr = line[10:] lastUpdateTime = parse_datetime_opt(timestr) if maxTimestamp is None or lastUpdateTime > maxTimestamp: maxTimestamp = lastUpdateTime currBook = OB(lastUpdateTime=lastUpdateTime) else: row = line.split(',') side = row[obc.SIDE] entry = Order( timestamp=parse_datetime_opt(row[obc.TIMESTAMP]), side=side, level=int(row[obc.LEVEL]), price=float(row[obc.PRICE]), size=long(row[obc.SIZE]), #orderdepthcount = int(row[obc.ORDERDEPTHCOUNT]) #ccy = row[obc.CURRENCY] ) if (side == obc.BID): currBook.add_bid(entry) elif (side == obc.OFFER): currBook.add_offer(entry) return book_list
class V3_Parser: def __init__(self): self.header = {} # sometimes the files are missing their first line, # in which case we need to reconstruct the header info self.done_with_header = False self.currBook = None self.books = [] # keep this around for printing debug info on restarts self.actions = [] self.SECONDS_PER_DAY = 60 * 60 * 24 self.order_cache = {} self.action_cache = {} def header_ok(self, f): old_pos = f.tell() line = f.readline() while len(line) == 0: line = f.readline() # return the position so we leave no externally visible changes if line.startswith('V3'): v, ccy, _ = line.split(',') f.seek(old_pos) return v == 'V3' elif line.startswith('RESTART'): try: while not line.startswith('OB'): line = f.readline() # found an order book, so file is probably OK f.seek(old_pos) return True except StopIteration: f.seek(old_pos) return False else: f.seek(old_pos) return False def parse_header(self, line): # make sure nothing else has been parsed yet assert self.currBook == None assert len(self.books) == 0 assert self.actions == [] assert len(self.header) == 0 fields = line.split(',') assert len(fields) == 3 v, ccy_str, _ = fields assert v == 'V3' self.header['ccy'] = parse_ccy(ccy_str) self.done_with_header = True def parse_add_action(self, line): _, side, volume, price, _ = line.split(',') action_type = ADD_ACTION_TYPE, side = OFFER_SIDE if side == '1' else BID_SIDE, price = parse_float(price) # I think I saw trailing decimal points in some logs, # so parse as float just in case volume = parse_int_with_decimal_point(volume) action = (action_type, side, price, volume) if action in self.action_cache: action = self.action_cache[action] else: self.action_cache[action] = action self.actions.append(action) def parse_delete_action(self, line): _, side, volume, price, _ = line.split(',') action_type = DELETE_ACTION_TYPE side = OFFER_SIDE if side == '1' else BID_SIDE, price = parse_float(price) volume = parse_int_with_decimal_point(volume) action = (action_type, side, price, volume) if action in self.action_cache: action = self.action_cache[action] else: self.action_cache[action] = action self.actions.append(action) def parse_modify_action(self, line): _, side, volume, price, _ = line.split(',') action_type = MODIFY_ACTION_TYPE side = OFFER_SIDE if side == '1' else BID_SIDE price = parse_float(price) volume = parse_int_with_decimal_point(volume) action = (action_type, side, price, volume) if action in self.action_cache: action = self.action_cache[action] else: self.action_cache[action] = action self.actions.append(action) def start_new_orderbook(self, line): self.at_start_of_file = False # periodically clear the order cache so it doesn't eat all the memory if len(self.order_cache) > 5000: self.order_cache.clear() if self.currBook: self.books.append(self.currBook) _, _, monotonic_time, exchange_time = line.split(',') monotonic_seconds, _, monotonic_nanoseconds = monotonic_time.partition(':') epoch_seconds, _, exchange_nanoseconds = exchange_time.partition(':') epoch_seconds = long(epoch_seconds) exchange_seconds = epoch_seconds % self.SECONDS_PER_DAY exchange_day = epoch_seconds / self.SECONDS_PER_DAY update_time = exchange_seconds * 1000 + long(exchange_nanoseconds) / (10**6) monotonic_time = long(monotonic_seconds)*1000 + long(monotonic_nanoseconds) / (10**6) self.currBook = OB( day = exchange_day, lastUpdateTime = update_time, lastUpdateMonotonic = monotonic_time, actions = self.actions ) self.actions = [] def unsupported(self, line): assert False def ignore(self, line): pass def parse(self, f, debug=False, end=None, drop_out_of_order=False): dispatch_table = { 'V': self.parse_header, 'A': self.parse_add_action, 'D': self.parse_delete_action, 'M': self.parse_modify_action, 'O': self.start_new_orderbook, '#': self.ignore, '\n': self.ignore, } for line in f: try: # this loop used to only dispatch on the first char # but I inlined the most common function 'build_orderbook_entry' # for slight performance boost if line[0] == 'Q': if line in self.order_cache: order = self.order_cache[line] else: _, monotonic_timestamp, side, level, price, size, _ = line.split(',') seconds, _, nanoseconds = monotonic_timestamp.partition(':') order = Order( timestamp = int(seconds) * 1000 + int(nanoseconds) / 1000000, side = (side == '1'), level = int(level), price = float(price), size = int(size) ) self.order_cache[line] = order self.currBook.add_order(order) else: dispatch_table[line[0]](line) except Exception as inst: # sometimes the collector doesn't finish printing # the last orderbook # so skip exceptions at the end of a file curr_pos = f.tell() peek_str = f.read(100) f.seek(curr_pos) if peek_str == '' and len(self.books) > 0: print "At last line of data file, ignoring error..." break elif "RESTART" in line: print "Found restart without preceding newline" print ">> ", line # had to inline restarts since, if they happen at the # beginning of the file, they force us to wind forward to # the first orderbook if self.done_with_header: continue else: assert self.currBook == None assert len(self.books) == 0 assert len(self.header) == 0 # if we haven't parsed a header yet then skip to first # orderbook we can find line = next(f) while not line.startswith('OB'): line = next(f) fields = line.split(',') assert len(fields) >= 2 ccy_str = fields[1] self.header['ccy'] = parse_ccy(ccy_str) # call the normal routine to start an orderbook self.start_new_orderbook(line) self.done_with_header = True continue else: print "Encountered error at line:", line print type(inst) # the exception instance print inst # __str__ allows args to printed directly print "Unrecoverable!" raise return self.header, self.books
class V3_Parser: def __init__(self): self.header = {} # sometimes the files are missing their first line, # in which case we need to reconstruct the header info self.done_with_header = False self.currBook = None self.books = [] # keep this around for printing debug info on restarts self.actions = [] self.SECONDS_PER_DAY = 60 * 60 * 24 self.order_cache = {} self.action_cache = {} def header_ok(self, f): old_pos = f.tell() line = f.readline() while len(line) == 0: line = f.readline() # return the position so we leave no externally visible changes if line.startswith('V3'): v, ccy, _ = line.split(',') f.seek(old_pos) return v == 'V3' elif line.startswith('RESTART'): try: while not line.startswith('OB'): line = f.readline() # found an order book, so file is probably OK f.seek(old_pos) return True except StopIteration: f.seek(old_pos) return False else: f.seek(old_pos) return False def parse_header(self, line): # make sure nothing else has been parsed yet assert self.currBook == None assert len(self.books) == 0 assert self.actions == [] assert len(self.header) == 0 fields = line.split(',') assert len(fields) == 3 v, ccy_str, _ = fields assert v == 'V3' self.header['ccy'] = parse_ccy(ccy_str) self.done_with_header = True def parse_add_action(self, line): _, side, volume, price, _ = line.split(',') action_type = ADD_ACTION_TYPE, side = OFFER_SIDE if side == '1' else BID_SIDE, price = parse_float(price) # I think I saw trailing decimal points in some logs, # so parse as float just in case volume = parse_int_with_decimal_point(volume) action = (action_type, side, price, volume) if action in self.action_cache: action = self.action_cache[action] else: self.action_cache[action] = action self.actions.append(action) def parse_delete_action(self, line): _, side, volume, price, _ = line.split(',') action_type = DELETE_ACTION_TYPE side = OFFER_SIDE if side == '1' else BID_SIDE, price = parse_float(price) volume = parse_int_with_decimal_point(volume) action = (action_type, side, price, volume) if action in self.action_cache: action = self.action_cache[action] else: self.action_cache[action] = action self.actions.append(action) def parse_modify_action(self, line): _, side, volume, price, _ = line.split(',') action_type = MODIFY_ACTION_TYPE side = OFFER_SIDE if side == '1' else BID_SIDE price = parse_float(price) volume = parse_int_with_decimal_point(volume) action = (action_type, side, price, volume) if action in self.action_cache: action = self.action_cache[action] else: self.action_cache[action] = action self.actions.append(action) def start_new_orderbook(self, line): self.at_start_of_file = False # periodically clear the order cache so it doesn't eat all the memory if len(self.order_cache) > 5000: self.order_cache.clear() if self.currBook: self.books.append(self.currBook) _, _, monotonic_time, exchange_time = line.split(',') monotonic_seconds, _, monotonic_nanoseconds = monotonic_time.partition( ':') epoch_seconds, _, exchange_nanoseconds = exchange_time.partition(':') epoch_seconds = long(epoch_seconds) exchange_seconds = epoch_seconds % self.SECONDS_PER_DAY exchange_day = epoch_seconds / self.SECONDS_PER_DAY update_time = exchange_seconds * 1000 + long(exchange_nanoseconds) / ( 10**6) monotonic_time = long(monotonic_seconds) * 1000 + long( monotonic_nanoseconds) / (10**6) self.currBook = OB(day=exchange_day, lastUpdateTime=update_time, lastUpdateMonotonic=monotonic_time, actions=self.actions) self.actions = [] def unsupported(self, line): assert False def ignore(self, line): pass def parse(self, f, debug=False, end=None, drop_out_of_order=False): dispatch_table = { 'V': self.parse_header, 'A': self.parse_add_action, 'D': self.parse_delete_action, 'M': self.parse_modify_action, 'O': self.start_new_orderbook, '#': self.ignore, '\n': self.ignore, } for line in f: try: # this loop used to only dispatch on the first char # but I inlined the most common function 'build_orderbook_entry' # for slight performance boost if line[0] == 'Q': if line in self.order_cache: order = self.order_cache[line] else: _, monotonic_timestamp, side, level, price, size, _ = line.split( ',') seconds, _, nanoseconds = monotonic_timestamp.partition( ':') order = Order(timestamp=int(seconds) * 1000 + int(nanoseconds) / 1000000, side=(side == '1'), level=int(level), price=float(price), size=int(size)) self.order_cache[line] = order self.currBook.add_order(order) else: dispatch_table[line[0]](line) except Exception as inst: # sometimes the collector doesn't finish printing # the last orderbook # so skip exceptions at the end of a file curr_pos = f.tell() peek_str = f.read(100) f.seek(curr_pos) if peek_str == '' and len(self.books) > 0: print "At last line of data file, ignoring error..." break elif "RESTART" in line: print "Found restart without preceding newline" print ">> ", line # had to inline restarts since, if they happen at the # beginning of the file, they force us to wind forward to # the first orderbook if self.done_with_header: continue else: assert self.currBook == None assert len(self.books) == 0 assert len(self.header) == 0 # if we haven't parsed a header yet then skip to first # orderbook we can find line = next(f) while not line.startswith('OB'): line = next(f) fields = line.split(',') assert len(fields) >= 2 ccy_str = fields[1] self.header['ccy'] = parse_ccy(ccy_str) # call the normal routine to start an orderbook self.start_new_orderbook(line) self.done_with_header = True continue else: print "Encountered error at line:", line print type(inst) # the exception instance print inst # __str__ allows args to printed directly print "Unrecoverable!" raise return self.header, self.books