Example #1
0
 def write_summary(self):
     ############################################################
     # Finish filling in the parse summary info and close up
     ############################################################
     self.__parse_manager.data_start(self.__data_start_timestamp)
     self.__parse_manager.data_stop(self.__current_timestamp)
     self.__parse_manager.irrelevants(0)
     self.__parse_manager.processed(self.__line_number+1)
     self.__parse_manager.mark_stop(True)
     self.__h5_file.close()
     ParseManager.summarize_file(self.__output_path)
Example #2
0
    def advance_date(self, new_date):
        if self.__h5_file:
            self.write_summary()

        self.__output_path = CME_OUT_PATH / get_date_string(new_date)
        if not self.__output_path.parent.exists():
            os.makedirs(self.__output_path.parent)
        print "OUT", self.__output_path
        self.__h5_file = openFile(self.__output_path, mode="w", title="CME Fix Data")
        self.__parse_manager = ParseManager(self.__current_input_path, self.__h5_file)
        self.__parse_manager.mark_start()
        self.__prior_day_books = {}
        self.__data_start_timestamp = 0
        self.__current_timestamp = 0
        for symbol, builder in self.__book_builders.items():
            self.__prior_day_books[symbol] = (builder.bid_book, builder.ask_book)
        self.__book_builders = {}
Example #3
0
    def parse(self, build_book = True, force = False, stop_early_at_hit=0):
        """
        Parse the input file. There are two modes: build_book=True and
        build_book=False. If build_book=False, the h5 file is simply the same
        record data from the gz file, but stored as hdf5. If build_book=True,
        the hdf5 file created has book data for all matching inputs. Each
        symbol gets it's own dataset.

        The ParseManager is used to store summary information for the parse of
        this data.
        """
        self.__output_path = self.__output_base + (build_book and ".h5" or "_AMD_.h5")
        logging.info("Parsing file %s\n\tto create %s"% (self.__input_path, self.__output_path))
        if self.__output_path.exists() and not force:
            return
        if not self.__output_path.parent.exists():
            os.makedirs(self.__output_path.parent)
        self.__h5_file = openFile(self.__output_path, mode = "w", title = "ARCA Equity Data")
        if not build_book:
            ## If not building book, then just writing out AMD data as hdf5
            filters = Filters(complevel=1, complib='zlib')
            group = self.__h5_file.createGroup("/", '_AMD_Data_', 'Add-Modify-Delete data')
            table = self.__h5_file.createTable(group, 'records', ArcaRecord, 
                                               "Data for "+str(self.__date), filters=filters)
            h5Record = table.row

        self.__parse_manager = ParseManager(self.__input_path, self.__h5_file)
        self.__parse_manager.mark_start()

        hit_count = 0
        data_start_timestamp = None

        for self.__line_number, line in enumerate(gzip.open(self.__input_path, 'rb')):

            if stop_early_at_hit and hit_count == stop_early_at_hit:
                break 

            ###################################################
            # Show progress periodically
            ###################################################
            if 0 == (self.__line_number % 1000000):
                logging.info("At %d hit count is %d on %s" % 
                             (self.__line_number, hit_count, 
                              (self.__symbols and 
                               self.__symbols or "*")))

            fields = re.split(r'\s*,\s*', line)
            code = fields[0]
            record = None
            if code == 'A':
                record = AddRecord(fields, self.__start_of_date)
            elif code == 'D':
                record = DeleteRecord(fields, self.__start_of_date)
            elif code == 'M':
                record = ModifyRecord(fields, self.__start_of_date)
            elif code == 'I' or code == 'V':
                continue
            else:
                continue
                #raise RuntimeError("Unexpected record type '" + 
                #                   code + "' at line " + str(self.__line_number) + 
                #                   " of file " + self.__input_path)

            if self.__symbols and (not record.symbol in self.__symbols):
                continue
            else:
                hit_count += 1

                # record the timestamp of the first record as data_start
                if not data_start_timestamp:
                    data_start_timestamp = record.timestamp

                if build_book:
                    self.build_books(record)
                else:
                    h5Record['ts'] = record.timestamp
                    h5Record['asc_ts'] = chicago_time_str(record.timestamp)
                    h5Record['symbol'] = record.symbol
                    h5Record['seq_num'] = record.seq_num
                    h5Record['order_id'] = record.order_id
                    h5Record['record_type'] = code
                    h5Record['buy_sell'] = (record.is_buy and 'B' or 'S')
                    if code != 'D':
                        h5Record['price'] = record.price
                        h5Record['quantity'] = record.quantity

                    h5Record.append()

                    if 0 == hit_count % __FLUSH_FREQ__:
                        table.flush()

        books_good = True
        total_unchanged = 0
        for symbol, builder in self.__book_builders.iteritems():
            books_good = books_good and builder.summary()
            total_unchanged += builder.unchanged

        ############################################################
        # Finish filling in the parse summary info and close up
        ############################################################
        self.__parse_manager.data_start(data_start_timestamp)
        self.__parse_manager.data_stop(record.timestamp)
        self.__parse_manager.irrelevants(total_unchanged)
        self.__parse_manager.processed(self.__line_number+1)
        self.__parse_manager.mark_stop(books_good)
        self.__h5_file.close()
        ParseManager.summarize_file(self.__output_path)
Example #4
0
class CmeRlcParser(object):
    r"""

"""

    readable(input_paths=None) 

    match_all = re.compile(".*")

    def __init__(self, input_path_list):
        """
        """
        self.__input_path_list = copy(input_path_list)
        self.__book_builders = {}
        self.__h5_file = None
        self.__ts = None
        self.__chi_ts = None
        self.__data_start_timestamp = 0
        self.__current_timestamp = None
        self.__output_path = None
        self.__prior_day_books = {}

    def write_summary(self):
        ############################################################
        # Finish filling in the parse summary info and close up
        ############################################################
        self.__parse_manager.data_start(self.__data_start_timestamp)
        self.__parse_manager.data_stop(self.__current_timestamp)
        self.__parse_manager.irrelevants(0)
        self.__parse_manager.processed(self.__line_number+1)
        self.__parse_manager.mark_stop(True)
        self.__h5_file.close()
        ParseManager.summarize_file(self.__output_path)

    def advance_date(self, new_date):
        if self.__h5_file:
            self.write_summary()

        self.__output_path = CME_OUT_PATH / get_date_string(new_date)
        if not self.__output_path.parent.exists():
            os.makedirs(self.__output_path.parent)
        print "OUT", self.__output_path
        self.__h5_file = openFile(self.__output_path, mode="w", title="CME Fix Data")
        self.__parse_manager = ParseManager(self.__current_input_path, self.__h5_file)
        self.__parse_manager.mark_start()
        self.__prior_day_books = {}
        self.__data_start_timestamp = 0
        self.__current_timestamp = 0
        for symbol, builder in self.__book_builders.items():
            self.__prior_day_books[symbol] = (builder.bid_book, builder.ask_book)
        self.__book_builders = {}


    def build_books(self, record):
        try:
            symbol = record.symbol
            builder = self.__book_builders.get(symbol)
            if not builder:
                builder = CmeRlcBookBuilder(symbol, self.__h5_file, 
                                            self.__prior_day_books.get(symbol, None),
                                            include_trades = True)
                self.__book_builders[symbol] = builder


            if record.is_book_message():
                builder.process_record(record)
            elif record.is_trade_message():
                builder.write_trade(record)

        except Exception,e:
            print traceback.format_exc()
            self.__parse_manager.warning(self.__current_file + ':' + e.message, 
                                         'G', self.__current_timestamp,
                                         self.__line_number+1)
Example #5
0
class CmeFixParser(object):
    r"""

"""

    readable(input_paths=None) 

    match_all = re.compile(".*")

    def __init__(self, input_paths):
        """
        """
        self.__input_paths = input_paths
        self.__book_builders = {}
        self.__prior_day_books = {}
        self.__h5_file = None
        self.__ts = 0
        self.__chi_ts = None
        self.__data_start_timestamp = 0
        self.__output_path = None

    def write_summary(self):
        ############################################################
        # Finish filling in the parse summary info and close up
        ############################################################
        self.__parse_manager.data_start(self.__data_start_timestamp)
        self.__parse_manager.data_stop(self.__ts)
        self.__parse_manager.irrelevants(0)
        self.__parse_manager.processed(self.__line_number+1)
        self.__parse_manager.mark_stop(True)
        self.__h5_file.close()
        ParseManager.summarize_file(self.__output_path)

    def advance_date(self, new_date):
        if self.__h5_file:
            self.write_summary()

        self.__output_path = CME_OUT_PATH / get_date_string(new_date)
        if not self.__output_path.parent.exists():
            os.makedirs(self.__output_path.parent)
        print "OUT", self.__output_path
        self.__h5_file = openFile(self.__output_path, mode="w", title="CME Fix Data")
        self.__parse_manager = ParseManager(self.__current_input_path, self.__h5_file)
        self.__parse_manager.mark_start()
        self.__prior_day_books = {}
        self.__data_start_timestamp = 0
        self.__ts = 0
        for symbol, builder in self.__book_builders.items():
            self.__prior_day_books[symbol] = (builder.bid_book, builder.ask_book)
        self.__book_builders = {}

    def build_books(self, msg):
        try:
            ts = timestamp_from_cme_timestamp(msg.sending_time)

            if 0 == self.__line_number % 100000:
                print "st:", msg.sending_time, "vs", ts, "vs", chicago_time_str(ts)

            if self.__ts:
                if ts < self.__ts:
                    print "At", self.__line_number+1, "of", self.__current_file, \
                        "previous ts:", self.__chi_ts, "new:", chi_ts, \
                        "Current Message:", pprint.pformat(msg)
                    assert False, "Timestamps going backward"

            self.__ts = ts
            self.__chi_ts = chicago_time_str(self.__ts)

            if 0 == self.__data_start_timestamp:
                self.__data_start_timestamp = self.__ts
            affected_builders = sets.Set()
            for update in msg.entries:
                symbol = update[SecurityDesc]
                builder = self.__book_builders.get(symbol, None)
                if not builder:
                    builder = CmeBookBuilder(symbol, self.__h5_file, 
                                             self.__prior_day_books.get(symbol, None),
                                             include_trades = True)
                    self.__book_builders[symbol] = builder

                if not update[MDEntryType] in __BOOK_ENTRY_TYPES__:
                    continue
        
                builder.process_record(update, self.__ts, self.__chi_ts, msg.msg_seq_num)
                affected_builders.add(builder)

            for builder in affected_builders:
                top_bid = builder.top_bid()
                top_ask = builder.top_ask()
                if top_bid and top_ask:
                    if top_bid == top_ask:
                        warning_msg = builder.symbol + ': Locked (%s, %s)'%(top_bid, top_ask)
                        print warning_msg
                        self.__parse_manager.warning(warning_msg, 'L', self.__ts, self.__line_number+1)
                    elif top_bid > top_ask:
                        warning_msg = builder.symbol + ': Crossed (%s, %s)'%(top_bid, top_ask)
                        print warning_msg
                        self.__parse_manager.warning(warning_msg, 'C', self.__ts, self.__line_number+1)
                    if not builder.write_record(self.__ts, self.__chi_ts, msg.msg_seq_num):
                        #print "Msg no book change", msg.line
                        pass

        except Exception,e:
            print traceback.format_exc()
            self.__parse_manager.warning(self.__current_file + ':' + e.message, 
                                         'G', self.__ts,
                                         self.__line_number+1)
Example #6
0
    table.sortby = "Date"
    table.align["Date"] = "l"
    for f in outfiles:
        d = get_date_of_file(f)
        if not d:
            print "Warning: found cme output file with no date", f
            continue
        dup = outfile_map.get(d)
        if dup:
            print "Warning: dup for date", d
        outfile_map[d] = f
        record = fileset.date_map.get(d)
        if not record:
            print "Could not find record for date", d
        else:
            try:
                summary = ParseManager.get_summary_record(f)
                datestr = str(d) + d.strftime(" (%A)")
                if summary:
                    table.add_row([datestr, record['type'], "Valid" if summary['is_valid'] else "Invalid",
                                   chicago_time(summary['data_start']), 
                                   chicago_time(summary['data_stop']),
                                   '?'])
                else:
                    table.add_row([datestr, record['type'], "No Summary", "", "", ""])
            except Exception, e:
                print "Caught exception:", e
                table.add_row([d, record['type'], "No Summary", "", "", ""])
    print table
    print "Num rows:", table.rowcount
Example #7
0
    def parse(self, build_book = True, force = False, stop_early_at_hit=0):
        """
        Parse the input file. There are two modes: build_book=True and
        build_book=False. If build_book=False, the h5 file is simply the same
        record data from the gz file, but stored as hdf5. If build_book=True,
        the hdf5 file created has book data for all matching inputs. Each
        symbol gets it's own dataset.

        The ParseManager is used to store summary information for the parse of
        this data.
        """
        self.__output_path = self.__output_base + (build_book and ".h5" or "_AMD_.h5")
        logging.info("Parsing file %s\n\tto create %s"% (self.__input_path, self.__output_path))
        if self.__output_path.exists() and not force:
            print 'Error: output file already exists. Use --force to overwrite.'
            return
        if not self.__output_path.parent.exists():
            os.makedirs(self.__output_path.parent)
        self.__h5_file = openFile(bytes(self.__output_path), mode = "w", title = "ARCA Equity Data")
        if not build_book:
            ## If not building book, then just writing out AMD data as hdf5
            filters = Filters(complevel=1, complib='zlib')
            group = self.__h5_file.createGroup("/", '_AMD_Data_', 'Add-Modify-Delete data')
            table = self.__h5_file.createTable(group, 'records', ArcaRecord,
                                               "Data for "+str(self.__date), filters=filters)
            h5Record = table.row

        self.__parse_manager = ParseManager(self.__input_path, self.__h5_file)
        self.__parse_manager.mark_start()

        hit_count = 0
        data_start_timestamp = None

        symbols_cleaned = map(lambda s: s if len(s) > 1 else ','+s+',', self.__symbols)
        symbol_regex = r'/\(' + string.join(symbols_cleaned, r'\|') + r'\)/p'

        unzip = subprocess.Popen(['gzip','-d','-c', self.__input_path],
                                 stdout=subprocess.PIPE,
                                 bufsize=-1)
        sed = subprocess.Popen(['sed','-n',symbol_regex],
                               stdin=unzip.stdout,
                               stdout=subprocess.PIPE,
                               bufsize=-1)

        # print "symbol regex: {}".format(symbol_regex)
        # print "from sed: {}".format(sed.stdout.readline())
        # print "from unzip: {}".format(unzip.stdout.readline())
        print "input path: {}".format(self.__input_path)

        infile = csv.reader(iter(sed.stdout.readline, ''))

        for self.__line_number, fields in enumerate(infile):
            # print "reading fields: {}".format(fields)
            if stop_early_at_hit and hit_count == stop_early_at_hit:
                break

            ###################################################
            # Show progress periodically
            ###################################################
            if 0 == (self.__line_number % 1000000):
                logging.info("At %d hit count is %d on %s" %
                             (self.__line_number, hit_count,
                              (self.__symbols and
                               self.__symbols or "*")))

            code = fields[0]
            record = None
            if code == 'A':
                record = AddRecord(fields, self.__start_of_date)
            elif code == 'D':
                record = DeleteRecord(fields, self.__start_of_date)
            elif code == 'M':
                record = ModifyRecord(fields, self.__start_of_date)
            elif code == 'I' or code == 'V':
                continue
            else:
                continue
                #raise RuntimeError("Unexpected record type '" +
                #                   code + "' at line " + str(self.__line_number) +
                #                   " of file " + self.__input_path)
            # print "built record: {}".format(record)

            if self.__symbols and (not record.symbol in self.__symbols):
                # print "passing"
                continue
            else:
                hit_count += 1
                # print "setting start timestamp"

                # record the timestamp of the first record as data_start
                if not data_start_timestamp:
                    data_start_timestamp = record.timestamp

                if build_book:
                    # print "calling book builder"
                    self.build_books(record)
                else:
                    h5Record['ts'] = record.timestamp
                    h5Record['asc_ts'] = chicago_time_str(record.timestamp)
                    h5Record['symbol'] = record.symbol
                    h5Record['seq_num'] = record.seq_num
                    h5Record['order_id'] = record.order_id
                    h5Record['record_type'] = code
                    h5Record['buy_sell'] = (record.is_buy and 'B' or 'S')
                    if code != 'D':
                        h5Record['price'] = record.price
                        h5Record['quantity'] = record.quantity

                    h5Record.append()

                    if 0 == hit_count % __FLUSH_FREQ__:
                        table.flush()

        sed.wait()

        books_good = True
        total_unchanged = 0
        for symbol, builder in self.__book_builders.iteritems():
            books_good = books_good and builder.summary()
            total_unchanged += builder.unchanged

        ############################################################
        # Finish filling in the parse summary info and close up
        ############################################################
        self.__parse_manager.data_start(data_start_timestamp)
        self.__parse_manager.data_stop(record.timestamp)
        self.__parse_manager.irrelevants(total_unchanged)
        self.__parse_manager.processed(self.__line_number+1)
        self.__parse_manager.mark_stop(books_good)
        self.__h5_file.close()
        ParseManager.summarize_file(bytes(self.__output_path))