def reset(self): """ reset the read location back to beginning, similar as rewind in POSIX """ self.counter = 0 self.trace_file.seek(0, 0) if self.c_reader: c_cacheReader.reset_reader(self.c_reader)
def get_timestamp_list(self): """ get a list of timestamps :return: a list of timestamps corresponding to requests """ ts_list = [] r = c_cacheReader.read_time_req(self.c_reader) while r: ts_list.append(r[0]) r = c_cacheReader.read_time_req(self.c_reader) return ts_list
def close(self): """ close reader, this is used to close the c_reader, which will not be automatically closed :return: """ try: if self is not None: if getattr(self, "trace_file", None): self.trace_file.close() self.trace_file = None if getattr(self, "c_reader", None) and globals().get( "c_cacheReader", None) is not None: c_cacheReader.close_reader(self.c_reader) self.c_reader = None except Exception as e: print("Exception during close reader: {}, ccacheReader={}".format( e, c_cacheReader))
def close(self): """ close reader, this is used to close the c_reader, which will not be automatically closed :return: """ try: if self is not None: if self.trace_file: self.trace_file.close() self.trace_file = None if self.c_reader and c_cacheReader is not None: c_cacheReader.close_reader(self.c_reader) self.c_reader = None except Exception as e: # return print("Exception during close reader: {}, ccacheReader={}".format( e, c_cacheReader))
def test_c_reader_plain(self): reader = c_cacheReader.setup_reader("{}/trace.txt".format(DAT_FOLDER), 'p') lines = c_cacheReader.get_num_of_req(reader) self.assertEqual(lines, 113872) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(int(first_request), 42932745) c_cacheReader.reset_reader(reader) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(int(first_request), 42932745) c_cacheReader.close_reader(reader)
def test_c_reader_csv(self): reader = c_cacheReader.setup_reader("{}/trace.csv".format(DAT_FOLDER), 'c', data_type='c', init_params={"header": True, "delimiter": ",", "label": 5, "size": 4}) lines = c_cacheReader.get_num_of_req(reader) self.assertEqual(lines, 113872) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(first_request, "42932745") c_cacheReader.reset_reader(reader) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(first_request, "42932745") c_cacheReader.close_reader(reader)
def test_c_reader_binary(self): reader = c_cacheReader.setup_reader("{}/trace.vscsi".format(DAT_FOLDER), 'b', data_type='l', init_params={"label": 6, "real_time": 7, "fmt": "<3I2H2Q"}) lines = c_cacheReader.get_num_of_req(reader) self.assertEqual(lines, 113872) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(int(first_request), 42932745) c_cacheReader.reset_reader(reader) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(int(first_request), 42932745) c_cacheReader.close_reader(reader)
def test_c_reader_vscsi(self): reader = c_cacheReader.setup_reader("{}/trace.vscsi".format(DAT_FOLDER), 'v') lines = c_cacheReader.get_num_of_req(reader) self.assertEqual(lines, 113872) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(int(first_request), 42932745) # +1 is to avoid block 0 c_cacheReader.reset_reader(reader) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(int(first_request), 42932745) c_cacheReader.close_reader(reader)
def __init__(self, file_loc, data_type='c', init_params=None, block_unit_size=0, disk_sector_size=0, open_c_reader=True, **kwargs): """ :param file_loc: location of the file :param data_type: type of data, can be "l" for int/long, "c" for string :param init_params: the init_params for opening csv :param block_unit_size: block size for storage system, 0 when disabled :param disk_sector_size: size of disk sector :param open_c_reader: bool for whether open reader in C backend :param kwargs: not used now """ super(CsvReader, self).__init__(file_loc, data_type, block_unit_size, disk_sector_size, open_c_reader, kwargs.get("lock", None)) assert init_params is not None, "please provide init_param for csvReader" assert "label" in init_params, "please provide label for csv reader" self.trace_file = open(file_loc, 'rb') # self.trace_file = open(file_loc, 'r', encoding='utf-8', errors='ignore') self.init_params = init_params self.label_column = init_params['label'] self.time_column = init_params.get("real_time", ) self.size_column = init_params.get("size", ) if self.time_column != -1: self.support_real_time = True if self.size_column != -1: self.support_size = True if block_unit_size != 0: assert "size" in init_params, "please provide size_column option to consider request size" self.header_bool = init_params.get('header', ) self.delimiter = init_params.get('delimiter', ",") if "delimiter" not in init_params: INFO("open {} using default delimiter \",\" for CsvReader".format(file_loc)) if self.header_bool: self.headers = [i.strip(string.whitespace) for i in self.trace_file.readline().decode().split(self.delimiter)] # self.trace_file.readline() if ALLOW_C_MIMIRCACHE and open_c_reader: self.c_reader = c_cacheReader.setup_reader(file_loc, 'c', data_type=data_type, block_unit_size=block_unit_size, disk_sector_size=disk_sector_size, init_params=init_params)
def __init__(self, file_loc, data_type='c', open_c_reader=True, **kwargs): """ :param file_loc: location of the file :param data_type: type of data, can be "l" for int/long, "c" for string :param open_c_reader: bool for whether open reader in C backend :param kwargs: not used now """ super(PlainReader, self).__init__(file_loc, data_type, open_c_reader=open_c_reader, lock=kwargs.get("lock")) self.trace_file = open(file_loc, 'rb') if ALLOW_C_MIMIRCACHE and open_c_reader: self.c_reader = c_cacheReader.setup_reader(file_loc, 'p', data_type=data_type, block_unit_size=0)
def test_c_reader_vscsi(self): reader = c_cacheReader.setup_reader( "{}/trace.vscsi".format(DAT_FOLDER), 'v') lines = c_cacheReader.get_num_of_req(reader) self.assertEqual(lines, 113872) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(int(first_request), 42932745) # +1 is to avoid block 0 c_cacheReader.reset_reader(reader) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(int(first_request), 42932745) c_cacheReader.close_reader(reader)
def test_c_reader_potpourri(self): v_reader = c_cacheReader.setup_reader("{}/trace.vscsi".format(DAT_FOLDER), 'v') c_reader = c_cacheReader.setup_reader("{}/trace.csv".format(DAT_FOLDER), 'c', data_type='l', init_params={"header": True, "delimiter": ",", "label": 5, "size": 4}) e1 = c_cacheReader.read_one_req(v_reader) e2 = c_cacheReader.read_one_req(c_reader) while e1 and e2: self.assertEqual(e1, e2) e1 = c_cacheReader.read_one_req(v_reader) e2 = c_cacheReader.read_one_req(c_reader)
def test_c_reader_csv(self): reader = c_cacheReader.setup_reader("{}/trace.csv".format(DAT_FOLDER), 'c', data_type='c', init_params={ "header": True, "delimiter": ",", "label": 5, "size": 4 }) lines = c_cacheReader.get_num_of_req(reader) self.assertEqual(lines, 113872) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(first_request, "42932745") c_cacheReader.reset_reader(reader) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(first_request, "42932745") c_cacheReader.close_reader(reader)
def test_c_reader_binary(self): reader = c_cacheReader.setup_reader( "{}/trace.vscsi".format(DAT_FOLDER), 'b', data_type='l', init_params={ "label": 6, "real_time": 7, "fmt": "<3I2H2Q" }) lines = c_cacheReader.get_num_of_req(reader) self.assertEqual(lines, 113872) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(int(first_request), 42932745) c_cacheReader.reset_reader(reader) first_request = c_cacheReader.read_one_req(reader) self.assertEqual(int(first_request), 42932745) c_cacheReader.close_reader(reader)
def get_num_of_req(self): """ count the number of requests in the trace, fast for binary type trace, for plain/csv type trace, this is slow :return: the number of requests in the trace """ if self.num_of_req > 0: return self.num_of_req # clear before counting self.num_of_req = 0 if self.c_reader: self.num_of_req = c_cacheReader.get_num_of_req(self.c_reader) else: while self.read_one_req() is not None: self.num_of_req += 1 self.reset() return self.num_of_req
def test_c_reader_potpourri(self): v_reader = c_cacheReader.setup_reader( "{}/trace.vscsi".format(DAT_FOLDER), 'v') c_reader = c_cacheReader.setup_reader( "{}/trace.csv".format(DAT_FOLDER), 'c', data_type='l', init_params={ "header": True, "delimiter": ",", "label": 5, "size": 4 }) e1 = c_cacheReader.read_one_req(v_reader) e2 = c_cacheReader.read_one_req(c_reader) while e1 and e2: self.assertEqual(e1, e2) e1 = c_cacheReader.read_one_req(v_reader) e2 = c_cacheReader.read_one_req(c_reader)
def __init__(self, file_loc, init_params, data_type='c', block_unit_size=0, disk_sector_size=0, open_c_reader=True, **kwargs): """ initialization function for binaryReader the init_params specify the parameters for opening the trace, it is a dictionary of following key-value pairs +------------------+--------------+---------------------+---------------------------------------------------+ | Keyword Argument | Value Type | Default Value | Description | +==================+==============+=====================+===================================================+ | label | int | this is required | the column of the label of the request | +------------------+--------------+---------------------+---------------------------------------------------+ | fmt | string | this is required | fmt string of binary data, same as python struct | +------------------+--------------+---------------------+---------------------------------------------------+ | real_time | int | NA | the column of real time | +------------------+--------------+---------------------+---------------------------------------------------+ | op | int | NA | the column of operation (read/write) | +------------------+--------------+---------------------+---------------------------------------------------+ | size | int | NA | the column of block/request size | +------------------+--------------+---------------------+---------------------------------------------------+ :param file_loc: location of the file :param init_params: init_params for binaryReader, see above :param data_type: type of data(label), can be "l" for int/long, "c" for string :param block_unit_size: block size for storage system, 0 when disabled :param disk_sector_size: size of disk sector :param open_c_reader: whether open c reader :param kwargs: not used now """ super(BinaryReader, self).__init__(file_loc, data_type, block_unit_size, disk_sector_size, open_c_reader, kwargs.get("lock", None)) assert 'fmt' in init_params, "please provide format string(fmt) in init_params" assert "label" in init_params, "please specify the order of label, beginning from 1" if block_unit_size != 0: assert "size" in init_params, "please provide size option to consider request size" self.init_params = init_params self.fmt = init_params['fmt'] # this number begins from 1, so need to reduce by one before use self.label_column = init_params['label'] self.time_column = init_params.get("real_time", ) self.size_column = init_params.get("size", ) self.trace_file = open(file_loc, 'rb') self.struct_instance = struct.Struct(self.fmt) self.record_size = struct.calcsize(self.fmt) self.trace_file_size = os.path.getsize(self.file_loc) assert self.trace_file_size % self.record_size == 0, \ "file size ({}) is not multiple of record size ({})".format(self.trace_file_size, self.record_size) if self.time_column != -1: self.support_real_time = True if self.size_column != -1: self.support_size = True if ALLOW_C_MIMIRCACHE and open_c_reader: # the data type here is not real data type, it will auto correct in C self.c_reader = c_cacheReader.setup_reader(file_loc, 'b', data_type=self.data_type, block_unit_size=block_unit_size, disk_sector_size=disk_sector_size, init_params=init_params) self.get_num_of_req()
def __init__(self, file_loc, data_type='c', init_params=None, block_unit_size=0, disk_sector_size=0, open_c_reader=True, **kwargs): """ :param file_loc: location of the file :param data_type: type of data, can be "l" for int/long, "c" for string :param init_params: the init_params for opening csv :param block_unit_size: block size for storage system, 0 when disabled :param disk_sector_size: size of disk sector :param open_c_reader: bool for whether open reader in C backend :param kwargs: not used now """ super(CsvReader, self).__init__(file_loc, data_type, block_unit_size, disk_sector_size, open_c_reader, kwargs.get("lock", None)) assert init_params is not None, "please provide init_param for csvReader" assert "label" in init_params, "please provide label for csv reader" self.trace_file = open(file_loc, 'rb') # self.trace_file = open(file_loc, 'r', encoding='utf-8', errors='ignore') self.init_params = init_params self.label_column = init_params['label'] self.time_column = init_params.get("real_time", ) self.size_column = init_params.get("size", ) if self.time_column != -1: self.support_real_time = True if self.size_column != -1: self.support_size = True if block_unit_size != 0: assert "size" in init_params, "please provide size_column option to consider request size" self.header_bool = init_params.get('header', ) self.delimiter = init_params.get('delimiter', ",") if "delimiter" not in init_params: INFO("open {} using default delimiter \",\" for CsvReader".format( file_loc)) if self.header_bool: self.headers = [ i.strip(string.whitespace) for i in self.trace_file.readline().decode().split(self.delimiter) ] # self.trace_file.readline() if ALLOW_C_MIMIRCACHE and open_c_reader: self.c_reader = c_cacheReader.setup_reader( file_loc, 'c', data_type=data_type, block_unit_size=block_unit_size, disk_sector_size=disk_sector_size, init_params=init_params)