def make_db(self, scan_path, out_file): print("make_db") print(scan_path) print(out_file) header = Header() fdict = Dict() fdata = Data() self.__start_scan(scan_path, fdata) print(len(fdata.records)) #--- make dict for record in fdata.records: fdict.append_record(record.fid, record.pid) print(len(fdict.records_map)) #--- update header data header.dict_start = Header.SIZE header.dict_size = fdict.get_bdata_size() header.data_start = Header.SIZE + header.dict_size # fdata_bin = fdata.pack() # header.data_size = len(fdata_bin) #--- fdata_bin = b"" # fdata_start = header.data_start fdata_start = 0 for record in fdata.records: rb = record.pack() #--- update dict addr fdict.set_addr(record.fid, fdata_start, len(rb)) fdata_bin += rb fdata_start += len(rb) header.data_size = len(fdata_bin) # flat: 1496, gzip: 8182 # gzip_fdata_bin = gzip.compress(fdata_bin,9) # print("-"*80) # print("flat: {:>4}, gzip: {:>4}".format(len(gzip_fdata_bin), len(fdata_bin))) # print("-"*80) #--- make binary bdata = b"" bdata += header.pack() bdata += fdict.pack() bdata += fdata_bin #--- write file with open(out_file, "wb") as fd: fd.write(bdata)
class Writer(object): def __init__(self, scan_path, out_file): self.scan_path = scan_path self.out_file = out_file self.header = Header() self.fdata = Data() def start(self): # fd = open(self.out_file, "wb") # fd.seek(Header.SIZE) etimer = ETimer() Writer.rescan(self.scan_path, self.fdata, 0, 1) etimer.elapsed("finish scan") # end_of_data_addr = fd.tell() # print("data writed, cur pos: ", end_of_data_addr) print("files: ", len(self.fdata.records)) # etimer.elapsed("start pack dict") # bin_fdict = self.fdict.pack() # etimer.elapsed("finish pack dict") # fd.write(bin_fdict) bin_data = self.fdata.pack() self.header.data_start = Header.SIZE self.header.data_size = len(bin_data) # self.header.dict_start = end_of_data_addr # self.header.dict_size = self.fdict.get_bdata_size() self.header.total_records = len(self.fdata.records) self.header.print_header() bin_header = self.header.pack() # fd.seek(0) # fd.write(bin_header) # fd.close() #--- start write with gzip.open(self.out_file, "wb") as fd: fd.write(bin_header) fd.write(bin_data) @staticmethod def rescan(scan_path, fdata, parent_id, last_id): fid = last_id for f in os.listdir(scan_path): full_path = os.path.join(scan_path, f) if os.path.isfile(full_path): st = os.stat(full_path) record = fdata.append_file(f, parent_id, fid, st) fid += 1 elif os.path.isdir(full_path): st = os.stat(full_path) record = fdata.append_dir(f, parent_id, fid, st) fid += 1 fid = Writer.rescan(full_path, fdata, record.fid, fid) else: pass return fid
class Store(object): def __init__(self): self.header = Header() self.fdata = Data() self.fdict = Dict() self.__fd = None def make_db(self, scan_path, out_file): print("make_db") print(scan_path) print(out_file) self.__fd = open(out_file, "wb") self.__fd.seek(Header.SIZE) #--- write files data # fdict = [] self.__start_scan(scan_path, self.__fd, self.fdict) end_of_data_addr = self.__fd.tell() print("data writed, cur pos: ", self.__fd.tell()) bin_fdict = self.fdict.pack() self.__fd.write(bin_fdict) self.header.data_start = Header.SIZE self.header.data_size = end_of_data_addr - Header.SIZE self.header.dict_start = end_of_data_addr self.header.dict_size = self.fdict.get_bdata_size() print(self.header) bin_header = self.header.pack() self.__fd.seek(0) self.__fd.write(bin_header) self.__fd.close() self.__fd = None # header = Header() # fdict = Dict() # fdata = Data() # self.__start_scan(scan_path, fdata) # print(len(fdata.records)) # #--- make dict # for record in fdata.records: # fdict.append_record(record.fid, record.pid) # print(len(fdict.records_map)) # #--- update header data # header.dict_start = Header.SIZE # header.dict_size = fdict.get_bdata_size() # header.data_start = Header.SIZE + header.dict_size # # fdata_bin = fdata.pack() # # header.data_size = len(fdata_bin) # #--- # fdata_bin = b"" # # fdata_start = header.data_start # fdata_start = 0 # for record in fdata.records: # rb = record.pack() # #--- update dict addr # fdict.set_addr(record.fid, fdata_start, len(rb)) # fdata_bin += rb # fdata_start += len(rb) # header.data_size = len(fdata_bin) # # flat: 1496, gzip: 8182 # # gzip_fdata_bin = gzip.compress(fdata_bin,9) # # print("-"*80) # # print("flat: {:>4}, gzip: {:>4}".format(len(gzip_fdata_bin), len(fdata_bin))) # # print("-"*80) # #--- make binary # bdata = b"" # bdata += header.pack() # bdata += fdict.pack() # bdata += fdata_bin # #--- write file # with open(out_file, "wb") as fd: # fd.write(bdata) # def __start_scan(self, scan_path, fdata: Data): # Store.rescan(scan_path, fdata, 0) def __start_scan(self, scan_path, fd, fdict: Dict): Store.rescan(scan_path, fd, fdict, self.fdata, 0) @staticmethod def rescan(spath, fd, fdict: Dict, fdata: Data, parent_id): for f in os.listdir(spath): full_path = os.path.join(spath, f) if os.path.isfile(full_path): record = fdata.append_file(f, parent_id) brecord = record.pack() #--- make index curr_pos = fd.tell() fdict.append_record(record.fid, record.pid, curr_pos, len(brecord)) #--- write bdata fd.write(brecord) else: record = fdata.append_dir(f, parent_id) brecord = record.pack() #--- make index curr_pos = fd.tell() fdict.append_record(record.fid, record.pid, curr_pos, len(brecord)) #--- write bdata fd.write(brecord) Store.rescan(full_path, fd, fdict, fdata, record.fid) def read_db(self, file_path): print("read_db") print(file_path) #--- open file with open(file_path, "rb") as fd: #--- header bheader = fd.read(Header.SIZE) header = Header(bheader) print(header) #--- dict fd.seek(header.dict_start) bdict = fd.read(header.dict_size) fdict = Dict(bdict) # for r in fdict.records_map.values(): # print(r) #--- data # for r in fdict.records_map.values(): # if r.pid == 0: # # print(r.fid, r.pid) # start = r.daddr # size = r.dsize # fd.seek(start) # bfile_data = fd.read(size) # record = DataRecord(bfile_data) # print(record) Store.reprint(fd, fdict, 0, 0) # bdata = fd.read(header.data_size) # fdata = Data(bdata) # print() # print("root files: ") # for r in fdict.records_map.values(): # if r.pid == 0: # ff = fdata.get_record(r.daddr, r.dsize) # print(ff) @staticmethod def reprint(fd, fdict, parent_id, ind): for r in fdict.records_map.values(): if r.pid == parent_id: start = r.daddr size = r.dsize fd.seek(start) bfile_data = fd.read(size) record = DataRecord(bfile_data) print(ind * "\t" + record.name) if record.ftype == 2: Store.reprint(fd, fdict, r.fid, ind + 1)
class Writer(object): def __init__(self, scan_path, out_file): self.scan_path = scan_path self.out_file = out_file self.header = Header() self.fdict = Dict() def start(self): fd = open(self.out_file, "wb") fd.seek(Header.SIZE) etimer = ETimer() # Writer.rescan(self.scan_path, fd, self.fdict, 0, 1) self.__start_scan(self.scan_path, fd, self.fdict, 0, 1) etimer.elapsed("finish scan") end_of_data_addr = fd.tell() print("data writed, cur pos: ", end_of_data_addr) print("files: ", self.fdict.records_count) etimer.elapsed("start pack dict") bin_fdict = self.fdict.pack() etimer.elapsed("finish pack dict") # print(len(bin_fdict)) # etimer.elapsed("start pack files") # bbb = b"" # vvv = [] # for r in FILES_LIST: # # bbb += # vvv.append(r.pack()) # bbb.join(vvv) # etimer.elapsed("finish pack files") fd.write(bin_fdict) self.header.data_start = Header.SIZE self.header.data_size = end_of_data_addr - Header.SIZE self.header.dict_start = end_of_data_addr self.header.dict_size = self.fdict.get_bdata_size() self.header.total_records = self.fdict.records_count self.header.print_header() bin_header = self.header.pack() fd.seek(0) fd.write(bin_header) fd.close() @dtimeit def __start_scan(self, scan_path, fd, fdict: Dict, parent_id, last_id): Writer.rescan(scan_path, fd, fdict, parent_id, last_id) @staticmethod def rescan(scan_path, fd, fdict: Dict, parent_id, last_id): fid = last_id for f in os.listdir(scan_path): full_path = os.path.join(scan_path, f) if os.path.isfile(full_path): st = os.stat(full_path) record = Writer.make_file(f, parent_id, fid, st) brecord = record.pack() #--- make index curr_pos = fd.tell() fdict.append_record(record.fid, record.pid, curr_pos, len(brecord)) #--- write bdata fd.write(brecord) fid += 1 elif os.path.isdir(full_path): st = os.stat(full_path) record = Writer.make_dir(f, parent_id, fid, st) brecord = record.pack() #--- make index curr_pos = fd.tell() fdict.append_record(record.fid, record.pid, curr_pos, len(brecord)) #--- write bdata fd.write(brecord) fid +=1 fid = Writer.rescan(full_path, fd, fdict, record.fid, fid) else: pass return fid @staticmethod def make_file(name, pid, fid, st): return Writer.make_ffile(name, pid, fid, st, 1) @staticmethod def make_dir(name, pid, fid, st): return Writer.make_ffile(name, pid, fid, st, 2) @staticmethod def make_ffile(name, pid, fid, st, ftype): record = DataRecord() record.name = name record.ftype = ftype record.pid = pid record.fid = fid record.size = st.st_size record.st_mode = st.st_mode record.st_uid = st.st_uid record.st_gid = st.st_gid record.st_ctime = st.st_ctime record.st_atime = st.st_atime record.st_mtime = st.st_mtime return record