def __readNdxFile(self, file, dic): fileName = file.filename sizeFile = file.file_size fileData = self.__zipFile.read(fileName, 'rU') bt = BytesIO(fileData) bt.seek(0) lenBytes = bt.read(2) # lenSum = self.BytesToInt(lenBytes) # print "lenSum = ", lenSum while bt.tell() < sizeFile: lenBytes12 = bt.read(12) # NULL bytes bytesNull = lenBytes12[0:2] bytesNullInt = self.BytesToInt(bytesNull) # FILETIME bytes bytesFileTime = lenBytes12[2:10] bytesFileTimeInt = self.BytesToInt(bytesFileTime) # offset in PDT bytes bytesOffsetPdt = lenBytes12[10:12] bytesOffsetPdtInt = self.BytesToInt(bytesOffsetPdt) startTimeInt = self.FiletimeToUnixtimestamp(bytesFileTimeInt) if bytesOffsetPdtInt not in dic.keys(): dic[bytesOffsetPdtInt] = [None, None] dic[bytesOffsetPdtInt][1] = startTimeInt bt.close()
def restore_tokens(self, file: _io.BytesIO): self.tokens.clear() while True: flag = int.from_bytes(file.read(1), "big") if flag == 0: self.tokens.append(stl.Token((stl.EOF, None))) break else: line = int(stl.read_string(file)) file_name = stl.read_string(file) lf = line, file_name if flag == 1: token: stl.NumToken = stl.NumToken(lf, stl.read_string(file)) elif flag == 2: token: stl.LiteralToken = stl.LiteralToken( lf, stl.read_string(file)) elif flag == 3: token: stl.IdToken = stl.IdToken(lf, stl.read_string(file)) elif flag == 4: token: stl.DocToken = stl.DocToken(lf, stl.read_string(file)) else: raise stl.ParseException("Unknown flag: {}".format(flag)) self.tokens.append(token)
def __readPdtFile(self, file, dic): fileName = file.filename sizeFile = file.file_size fileData = self.__zipFile.read(fileName, 'rU') bt = BytesIO(fileData) bt.seek(int('0x01A', 0)) while bt.tell() < sizeFile: pos = bt.tell() lenBytes = bt.read(2) lenBytesHex = "0x" + ''.join( [ "%02X" % ord( x ) for x in reversed(lenBytes) ] ) lenSum = int(lenBytesHex, 0) bytesProName = unicode(bt.read(lenSum), self.__jtvEncodeProgrammName) if pos not in dic.keys(): dic[pos] = [None, None] dic[pos][0] = bytesProName bt.close()
def decompress(frequencyTable, valueData): buffer = BytesIO(valueData) uncompressedSize = readUnsignedLeb128(buffer) compressedSize = len(valueData) - buffer.tell() # create a buffer to decompress into inputData = buffer.read(compressedSize) outputData = ctypes.create_string_buffer(uncompressedSize) decompDLL.decompressData(frequencyTable, inputData, compressedSize, outputData, uncompressedSize) return BytesIO(outputData.raw)
class BaseBitcoinClient(object): def __init__(self, socket): self.socket = socket self.buffer = BytesIO() self.stop_client = False def close_stream(self): self.socket.close() def send_message(self, message): self.socket.sendall(message.to_bytes()) def handshake(self): # Send a "version" message to start the handshake msg = msg_version() # See BIP 111 (https://github.com/bitcoin/bips/blob/master/bip-0111.mediawiki) msg.nVersion = 70011 msg.fRelay = False # If false then broadcast transactions will not be announced until a filter{load,add,clear} command is received self.send_message(msg) def handle_version(self, _): # Respond with a "verack" message to a "version" message msg = msg_verack() self.send_message(msg) def handle_ping(self, ping_message): # Respond with a pong message to a ping message msg = msg_pong() msg.nonce = ping_message.nonce self.send_message(msg) def run(self): while self.stop_client != True: # Read and store the data from the socket data = self.socket.recv(64) self.buffer.write(data) try: # Go at the beginning of the buffer self.buffer.seek(0) # Deserialize the message message = MsgSerializable().stream_deserialize(self.buffer) # Reset the buffer remaining = self.buffer.read() self.buffer = BytesIO() self.buffer.write(remaining) # Call handle function if message is not None: handle_func_name = "handle_" + message.command.decode( "utf-8") handle_func = getattr(self, handle_func_name, None) if handle_func: handle_func(message) except SerializationTruncationError: # Read more data from the socket pass
def history_tick_content(contract, date): response = requests.get(hist_tick_url(contract, date), stream=True) if response.status_code == 200: disposition = response.headers['Content-Disposition'] bio = BytesIO(b"") chunk_size = 2**16 with click.progressbar(response.iter_content(chunk_size), label=disposition) as bar: for content in bar: bio.write(content) bio.seek(0) return bio.read() else: raise IOError(response.status_code)
def __init__(self, database, language="english", filename="english_lang_cds.txt"): self.database = database self.language = language # load decompression DLL decompDLL = ctypes.CDLL("riftdecomp.dll") stream = BytesIO() self.database.extractByNameToMemory("lang_" + language + ".cds", stream) # seek to start stream.seek(0) dis = LittleEndianDataInputStream(stream) entryCount = dis.read_int(); # read the frequency table frequencyTable = stream.read(1024) print("entryCount:" + str(entryCount)) # not sure what these are for i in range(0, entryCount): key = stream.read(4) value = readUnsignedLeb128(stream) f = open(filename, "w", encoding='UTF-8') for i in range(0, entryCount): compressedSize = readUnsignedLeb128(stream) uncompressedSize = readUnsignedLeb128(stream) entryData = stream.read(compressedSize) # create a buffer to decompress into outputData = ctypes.create_string_buffer(uncompressedSize) # call a DLL to do the actual decompress. The ASM code to decompress was too complicated to reverse engineer, so I just # took the code and put it into a DLL decompDLL.decompressData(frequencyTable, entryData, compressedSize, outputData, uncompressedSize) # And the results are in! # - The first 10 bytes we don't know, they seem to be the same between files though? buffer = BytesIO(outputData.raw) buffer.read(10) # - Then a LEB128 with length of string strLength = readUnsignedLeb128(buffer) # - Then string finalStringBytes = buffer.read(strLength) finalString = finalStringBytes.decode("utf-8") # print("doing entry: " + str(i) + ", length[" + str(strLength) + "]:" + finalString.encode(sys.stdout.encoding, errors="replace").decode(sys.stdout.encoding)) print(finalString,file=f) f.close()
''' Created on 2017年6月21日 @author: admin ''' from io import StringIO #StringIO模块的作用是在内存中读写 from _io import BytesIO f = StringIO() s = f.write('wskd fkdsa ') print(f.getvalue()) #获取写入的值 print(s) #获取长度 n = BytesIO() n.write('中文'.encode('utf_8')) #写入的不是str类型,而是经过utf-8编码的。 #n.write('中文'.encode(encoding='utf_8', errors='strict')) print(n) print(n.getvalue()) data = '人闲桂花落,夜静春山空。月出惊山鸟,时鸣春涧中。'.encode('utf-8') n = BytesIO(data) print(n) print(n.read())
def extractUnencryptedTelaraDB(unencryptedDBFilename, extractDirectory): print("Begin extracting of " + unencryptedDBFilename) # load decompression DLL decompDLL = ctypes.CDLL("riftdecomp.dll") conn = sqlite3.connect(unencryptedDBFilename) conn.row_factory = sqlite3.Row ds = conn.cursor() # DatasetID appears to be a "category" of sorts, with the datasetkey being subcategories # For example, dataset 7701 has different keys for different languages. # Guesses at some randomly chosen dataset id contents: # 83 - ability formulas # 84 - worlds? contains NIF references # 111 - Scene? # 114 - sound bank reference # 4307 - profanity block? # 7701 - EULA # In test mode only the first row for each datasetid will be extracted, disable it to extract more than one row per datasetid ############### # # WARNING: BE AWARE IF YOU DISABLE TEST MODE WITHOUT CHANGING THE SQL EURY YOU WILL PULL **EVERY RECORD** FROM THE DATABASE. # WARNING: THERE ARE 400,000+ AND MOST ARE UNDER 1KB.BE AWARE THAT your filesystem might not appreciate 400,000 1KB files suddenly appearing # WARNING: You may wish to filter the first query by a specific datasetId, eg: # WARNING: ds.execute('SELECT * from dataset where datasetId=?', (7701,)) # ############### TEST_MODE = True test_mode_ids = set() ds.execute('SELECT * from dataset order by length(value) desc') while (1): rowA = ds.fetchone() if rowA == None: break dsc = conn.cursor() dsid = rowA["datasetId"] dskey = rowA["datasetKey"] dsname = rowA[ "name"] # some entries have a "name" that can be useful to identify, but often have funny characters in them so we can't use them directly if TEST_MODE: if dsid in test_mode_ids: continue test_mode_ids.add(dsid) dsc.execute("select * from dataset_compression where datasetid= ?", (dsid, )) freqRow = dsc.fetchone() valueData = rowA['value'] frequencyTable = freqRow["frequencies"] buffer = BytesIO(valueData) uncompressedSize = readUnsignedLeb128(buffer) compressedSize = len(valueData) - buffer.tell() # create a buffer to decompress into inputData = buffer.read(compressedSize) outputData = ctypes.create_string_buffer(uncompressedSize) decompDLL.decompressData(frequencyTable, inputData, compressedSize, outputData, uncompressedSize) # write our new data to f = open(os.path.join(extractDirectory + str(dsid) + "_" + str(dskey)), "wb") f.write(outputData) f.close() dsc.close() ds.close()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- ''' Created on 2018年11月13日 @author: zzk ''' # StringIO和BytesIO是在内存中操作str和bytes的方法,使得和读写文件具有一致的接口。 # StringIO顾名思义就是在内存中读写str。 from io import StringIO from _io import BytesIO f=StringIO('hello\nworld\n zzk') f2=StringIO() f2.write('my\nname\nis\nzzk') print(f2.getvalue()) print('====================') while True: s=f.readline() if s=='': break print(s.strip()) print('====================') # BytesIO fb = BytesIO() fb.write('哈哈哈'.encode('utf-8')) print(fb.getvalue()) fb2 = BytesIO(b'\xe5\x93\x88\xe5\x93\x88\xe5\x93\x88') print(fb2.read().decode('utf-8'))