def testReadLine(self): io = StringIO(SAMPLE_CHUNKED_REQUEST) answer = StringIO(SAMPLE_CHUNKED_ANSWER) chunky = worker.ChunkedReader(io) for line in answer.readlines(): self.assertEqual(line, chunky.readline())
def get_text_lines(location, max_pages=5): """ Return a list of unicode text lines extracted from a pdf file at `location`. May raise exceptions. Extract up to `max_pages` pages. """ extracted_text = BytesIO() laparams = LAParams() with open(location, 'rb') as pdf_file: with contextlib.closing(PDFParser(pdf_file)) as parser: document = PDFDocument(parser) if not document.is_extractable: raise PDFTextExtractionNotAllowed( 'Encrypted PDF document: text extraction is not allowed') manager = PDFResourceManager() with contextlib.closing( TextConverter(manager, extracted_text, laparams=laparams)) as extractor: interpreter = PDFPageInterpreter(manager, extractor) pages = PDFPage.create_pages(document) for page_num, page in enumerate(pages, 1): interpreter.process_page(page) if max_pages and page_num == max_pages: break extracted_text.seek(0) return extracted_text.readlines()
def _get_file(self, filepath): out = BytesIO() call_command("findstatic", filepath, all=False, verbosity=0, stdout=out) out.seek(0) lines = [l.strip() for l in out.readlines()] contents = codecs.open(smart_unicode(lines[1].strip()), "r", "utf-8").read() return contents
def testReadLines(self): # "Test BZ2File.readlines()" self.createTempFile() with BZ2File(self.filename) as bz2f: self.assertRaises(TypeError, bz2f.readlines, None) sio = BytesIO(self.TEXT) self.assertEqual(bz2f.readlines(), sio.readlines())
def make_geom_file(cls, atoms, filename, spin_guess=None): # spin_guess = [[spin_up_indexes], [spin_down_indexes]] tmpdir = tempfile.mkdtemp() file_path = tmpdir + "/" + filename orig_file = BytesIO() atoms.write(orig_file, format='xyz') orig_file.seek(0) all_lines = orig_file.readlines() comment = all_lines[1] # with newline character! orig_lines = all_lines[2:] modif_lines = [] for i_line, line in enumerate(orig_lines): new_line = line lsp = line.split() if spin_guess is not None: if i_line in spin_guess[0]: new_line = lsp[0] + "1 " + " ".join(lsp[1:]) + "\n" if i_line in spin_guess[1]: new_line = lsp[0] + "2 " + " ".join(lsp[1:]) + "\n" modif_lines.append(new_line) final_str = "%d\n%s" % (len(atoms), comment) + "".join(modif_lines) with open(file_path, 'w') as f: f.write(final_str) aiida_f = SinglefileData(file=file_path) shutil.rmtree(tmpdir) return aiida_f
def test_write_int(self): a = np.array([[1, 2], [3, 4]], dtype=int) c = BytesIO() self.writer.write(c, a, delimiter=',', fmt='%d') c.seek(0) lines = c.readlines() self.assertEquals(lines, [b'1,2\n', b'3,4\n'])
def testIterator(self): # "Test iter(BZ2File)" self.createTempFile() bz2f = BZ2File(self.filename) sio = BytesIO(self.TEXT) self.assertEqual(list(iter(bz2f)), sio.readlines()) bz2f.close()
def _unpack(self, buf): """Extract into a list irc messages of a tcp streams. @buf: tcp stream data """ try: f = BytesIO(buf) lines = f.readlines() except Exception: log.error("Failed reading tcp stream buffer") return False logirc = False for element in lines: if not re.match(b"^:", element) is None: command = "([a-zA-Z]+|[0-9]{3})" params = "(\x20.+)" irc_server_msg = re.findall("(^:[\w+.{}!@|()]+\x20)" + command + params, element) if irc_server_msg: self._sc["prefix"] = convert_to_printable(irc_server_msg[0][0].strip()) self._sc["command"] = convert_to_printable(irc_server_msg[0][1].strip()) self._sc["params"] = convert_to_printable(irc_server_msg[0][2].strip()) self._sc["type"] = "server" if logirc: self._messages.append(dict(self._sc)) else: irc_client_msg = re.findall(b"([a-zA-Z]+\x20)(.+[\x0a\0x0d])", element) if irc_client_msg and irc_client_msg[0][0].strip() in self.__methods_client: self._cc["command"] = convert_to_printable(irc_client_msg[0][0].strip()) if self._cc["command"] in ["NICK", "USER"]: logirc = True self._cc["params"] = convert_to_printable(irc_client_msg[0][1].strip()) self._cc["type"] = "client" if logirc: self._messages.append(dict(self._cc))
def test_array_float(self, xp): a = xp.array([[1, 2], [3, 4]], float) fmt = "%.18e" c = BytesIO() xp.savetxt(c, a, fmt=fmt) c.seek(0) return c.readlines()
def test_create_day_archive(self): """ Should write line separated json into the provided file, from the data in the events database """ self.create_event( timestamp=datetime(2020, 12, 3, tzinfo=timezone.utc).timestamp()) self.create_event( timestamp=(datetime(2020, 12, 2, tzinfo=timezone.utc) - timedelta(seconds=1)).timestamp()) for _ in range(2): self.create_event() file = BytesIO() create_day_archive(self.conn, file, date(2020, 12, 2)) file.seek(0) lines = file.readlines() self.assertEqual(len(lines), 2) for i, line in enumerate(lines): self.assertEqual( json.loads(line), { "id": i + 3, "sender_id": "27820001001", "type_name": "action", "timestamp": 1606917902, "intent_name": None, "action_name": "action_session_start", "data": '{"event": "session_started"}', }, )
def test_simple_bytesio(self): f = BytesIO() c = Commit() c.committer = c.author = b"Jelmer <*****@*****.**>" c.commit_time = c.author_time = 1271350201 c.commit_timezone = c.author_timezone = 0 c.message = b"This is the first line\nAnd this is the second line.\n" c.tree = Tree().id write_commit_patch(f, c, b"CONTENTS", (1, 1), version="custom") f.seek(0) lines = f.readlines() self.assertTrue(lines[0].startswith( b"From 0b0d34d1b5b596c928adc9a727a4b9e03d025298")) self.assertEqual(lines[1], b"From: Jelmer <*****@*****.**>\n") self.assertTrue(lines[2].startswith(b"Date: ")) self.assertEqual([ b"Subject: [PATCH 1/1] This is the first line\n", b"And this is the second line.\n", b"\n", b"\n", b"---\n"], lines[3:8]) self.assertEqual([ b"CONTENTS-- \n", b"custom\n"], lines[-2:]) if len(lines) >= 12: # diffstat may not be present self.assertEqual(lines[8], b" 0 files changed\n")
def test_simple_bytesio(self): f = BytesIO() c = Commit() c.committer = c.author = b"Jelmer <*****@*****.**>" c.commit_time = c.author_time = 1271350201 c.commit_timezone = c.author_timezone = 0 c.message = b"This is the first line\nAnd this is the second line.\n" c.tree = Tree().id write_commit_patch(f, c, b"CONTENTS", (1, 1), version="custom") f.seek(0) lines = f.readlines() self.assertTrue(lines[0].startswith(b"From 0b0d34d1b5b596c928adc9a727a4b9e03d025298")) self.assertEqual(lines[1], b"From: Jelmer <*****@*****.**>\n") self.assertTrue(lines[2].startswith(b"Date: ")) self.assertEqual( [ b"Subject: [PATCH 1/1] This is the first line\n", b"And this is the second line.\n", b"\n", b"\n", b"---\n", ], lines[3:8], ) self.assertEqual([b"CONTENTS-- \n", b"custom\n"], lines[-2:]) if len(lines) >= 12: # diffstat may not be present self.assertEqual(lines[8], b" 0 files changed\n")
def send_typical_request(self, message): input = BytesIO(message) output = BytesIO() self.handler.rfile = input self.handler.wfile = output self.handler.handle_one_request() output.seek(0) return output.readlines()
def _get_file(self, filepath): out = BytesIO() call_command('findstatic', filepath, all=False, verbosity=0, stdout=out) out.seek(0) lines = [l.strip() for l in out.readlines()] contents = codecs.open( smart_unicode(lines[1].strip()), "r", "utf-8").read() return contents
def test_iterator(self): from bz2 import BZ2File from io import BytesIO self.create_temp_file() bz2f = BZ2File(self.temppath) sio = BytesIO(self.TEXT) assert list(iter(bz2f)) == sio.readlines() bz2f.close()
def channels_map(self, bucket_name, filename): f = BytesIO() self.s3.download_fileobj(bucket_name, filename, f) f.seek(0) return { line.decode('utf-8').split(",")[-1].strip().split("/")[-1]: line.decode('utf-8').split(",")[0] for line in f.readlines() }
def testReadLine(self): # "Test BZ2File.readline()" self.createTempFile() bz2f = BZ2File(self.filename) self.assertRaises(TypeError, bz2f.readline, None) sio = BytesIO(self.TEXT) for line in sio.readlines(): self.assertEqual(bz2f.readline(), line) bz2f.close()
def testWriteLines(self): # "Test BZ2File.writelines()" with BZ2File(self.filename, "w") as bz2f: self.assertRaises(TypeError, bz2f.writelines) sio = BytesIO(self.TEXT) bz2f.writelines(sio.readlines()) # patch #1535500 self.assertRaises(ValueError, bz2f.writelines, ["a"]) with open(self.filename, 'rb') as f: self.assertEqual(self.decompress(f.read()), self.TEXT)
def test_readlines(self): from bz2 import BZ2File from io import BytesIO self.create_temp_file() bz2f = BZ2File(self.temppath) raises(TypeError, bz2f.readlines, None) sio = BytesIO(self.TEXT) assert bz2f.readlines() == sio.readlines() bz2f.close()
def test_write_float(self): a = np.array([[1, 2], [3, 4]], dtype=float) c = BytesIO() self.writer.write(c, a, delimiter=',', fmt='%.18e') c.seek(0) lines = c.readlines() self.assertEquals(lines, [ b'{:{fmt}},{:{fmt}}\n'.format(1, 2, fmt='.18e'), b'{:{fmt}},{:{fmt}}\n'.format(3, 4, fmt='.18e') ])
def test_all_files(self): """ Test that findstatic returns all candidate files if run without --first. """ out = BytesIO() call_command('findstatic', 'test/file.txt', verbosity=0, stdout=out) out.seek(0) lines = [l.strip() for l in out.readlines()] self.assertEqual(len(lines), 3) # three because there is also the "Found <file> here" line self.assertIn('project', lines[1]) self.assertIn('apps', lines[2])
def test_readline(self): from bz2 import BZ2File from io import BytesIO self.create_temp_file() bz2f = BZ2File(self.temppath) raises(TypeError, bz2f.readline, None) sio = BytesIO(self.TEXT) for line in sio.readlines(): line_read = bz2f.readline() assert line_read == line bz2f.close()
def test_writelines(self): from bz2 import BZ2File from io import BytesIO bz2f = BZ2File(self.temppath, 'w') raises(TypeError, bz2f.writelines) sio = BytesIO(self.TEXT) bz2f.writelines(sio.readlines()) bz2f.close() f = open(self.temppath, "rb") assert self.decompress(f.read()) == self.TEXT f.close()
def bytes_io(): # stringIO 只能操作str,如果要操作二进制数据,就需要用到BytesIO # 上面的sio无法用seek从当前位置向前移动,这时候,我们用'b'的方式写入数据,就可以向前移动了 bio = BytesIO() bio.write(abc.encode('utf-8')) print(bio.getvalue()) print(bio.tell()) # 36 bio.seek(-36, 1) # 从当前位置,往前走36个长度 print(bio.tell()) # 0,走到了0的位置 for i in bio.readlines(): print(i.strip())
def assertRoundTrips(self, xml_string): inp = BytesIO(xml_string) inv = breezy.bzr.xml5.serializer_v5.read_inventory(inp) outp = BytesIO() breezy.bzr.xml5.serializer_v5.write_inventory(inv, outp) self.assertEqualDiff(xml_string, outp.getvalue()) lines = breezy.bzr.xml5.serializer_v5.write_inventory_to_lines(inv) outp.seek(0) self.assertEqual(outp.readlines(), lines) inv2 = breezy.bzr.xml5.serializer_v5.read_inventory( BytesIO(outp.getvalue())) self.assertEqual(inv, inv2)
def deserialize_fileid_map(filetext): """Deserialize a file id map. :param file: File :return: Fileid map (path -> fileid) """ ret = {} f = BytesIO(filetext) lines = f.readlines() for l in lines: (path, file_id) = l.rstrip(b"\n").split(b"\0") ret[path.decode('utf-8')] = file_id return ret
class ISAPIInputWrapper: # Based on ModPythonInputWrapper in mp_wsgi_handler.py def __init__(self, ecb): self._in = BytesIO() self._ecb = ecb if self._ecb.AvailableBytes > 0: data = self._ecb.AvailableData # Check if more data from client than what is in ecb.AvailableData excess = self._ecb.TotalBytes - self._ecb.AvailableBytes if excess > 0: extra = self._ecb.ReadClient(excess) data = data + extra self._in.write(data) # rewind to start self._in.seek(0) def next(self): return self._in.next() def read(self, size=-1): return self._in.read(size) def readline(self, size=-1): return self._in.readline(size) def readlines(self, hint=-1): return self._in.readlines() def reset(self): self._in.reset() def seek(self, *args, **kwargs): self._in.seek(*args, **kwargs) def tell(self): return self._in.tell() def __iter__(self): return iter(self._in.readlines())
def coverage_report(cov, report): """ Outputs Coverage report to screen and html. """ try: if report: log.info("\nCoverage Report (showing uncovered modules):") real_stdout = sys.stdout fake_stdout = BytesIO() sys.stdout = fake_stdout try: cov.report() finally: sys.stdout = real_stdout fake_stdout.seek(0) complete = True last_line = None for line in fake_stdout.readlines(): line = line.rstrip() if line.endswith('%'): last_line = line if not line.endswith('100%'): complete = False break fake_stdout.seek(0) if complete and last_line: for line in fake_stdout.readlines()[:2]: print(line.rstrip()) print last_line else: for line in fake_stdout.readlines(): line = line.rstrip() if line.endswith('100%'): continue print(line) cov.html_report() cov.save() # Save the .coverage file so it can be used if necessary. except misc.CoverageException as e: log.error("Coverage Exception: %s" % e)
def _get_questions(file: io.BytesIO) -> List[str]: try: questions = [ normalize_question(question.decode(encoding='utf-8')) for question in file.readlines() ] except Exception as e: logging.exception(msg=e) raise UnicodeError('Не удалось прочитать ваш файл.\n' 'Пожалуйста, проверьте корректность вашего ' 'документа, а также убедитесь в том, что он ' 'сохранён в кодировке UTF-8.') else: return questions
def pwn_obfuscate(data): reader = BytesIO(data) decoded = b""" def decode_string(string): decoded = [chr(ord(b) ^ 0x04) for b in string] return "".join(decoded) """ for line in reader.readlines(): if not line.startswith(b"#"): decoded += re.sub(rb"""PWN("|')(.*)(?<!\\)\1""", pwn_replace, line) return decoded
def external_udiff_lines(old, new, use_stringio=False): if use_stringio: # BytesIO has no fileno, so it tests a different codepath output = BytesIO() else: output = tempfile.TemporaryFile() try: diff.external_diff('old', old, 'new', new, output, diff_opts=['-u']) except errors.NoDiff: raise tests.TestSkipped('external "diff" not present to test') output.seek(0, 0) lines = output.readlines() output.close() return lines
def add_file_edge_hashes(self, tree, file_ids): """Update to reflect the hashes for files in the tree. :param tree: The tree containing the files. :param file_ids: A list of file_ids to perform the updates for. """ desired_files = [(tree.id2path(f), f) for f in file_ids] with ui_factory.nested_progress_bar() as task: for num, (file_id, contents) in enumerate( tree.iter_files_bytes(desired_files)): task.update(gettext('Calculating hashes'), num, len(file_ids)) s = BytesIO() s.writelines(contents) s.seek(0) self.add_edge_hashes(s.readlines(), file_id)
def test_dump_toggles(self): mocked_toggles, expected_items = self._get_mocked_toggles() dumper = ToggleDumper(self.domain_name, []) dumper._user_ids_in_domain = Mock(return_value={'user1', 'user2', 'user3'}) output_stream = BytesIO() with mock_out_couch(docs=[doc.to_json() for doc in mocked_toggles.values()]): dump_counter = dumper.dump(output_stream) self.assertEqual(3, dump_counter['Toggle']) output_stream.seek(0) dumped = [json.loads(line.strip()) for line in output_stream.readlines()] for dump in dumped: self.assertItemsEqual(expected_items[dump['slug']], dump['enabled_users'])
def tail(logFile,line,field=[0,1]): lFile = open(logFile,'a') lFile.close() ## for python 3.5 #process = subprocess.run(['tail',logFile,'-n '+str(int(line))],stdout=subprocess.PIPE) #buf = BytesIO(process.stdout) ## for python 3.2 buf = BytesIO( subprocess.check_output(['tail',logFile,'-n '+str(int(line))]) ) data = [] for line in buf.readlines(): loadline = json.loads(line.decode()) returnLine = [] for i in field: returnLine.append(loadline[i]) data.append( returnLine ) return data
class ReturnCodeToSubunit(object): """Converts a process return code to a subunit error on the process stdout. The ReturnCodeToSubunit object behaves as a readonly stream, supplying the read, readline and readlines methods. If the process exits non-zero a synthetic test is added to the output, making the error accessible to subunit stream consumers. If the process closes its stdout and then does not terminate, reading from the ReturnCodeToSubunit stream will hang. This class will be deleted at some point, allowing parsing to read from the actual fd and benefit from select for aggregating non-subunit output. """ def __init__(self, process): """Adapt a process to a readable stream. :param process: A subprocess.Popen object that is generating subunit. """ self.proc = process self.done = False self.source = self.proc.stdout self.lastoutput = LINEFEED def _append_return_code_as_test(self): if self.done is True: return self.source = BytesIO() returncode = self.proc.wait() if returncode != 0: if self.lastoutput != LINEFEED: # Subunit V1 is line orientated, it has to start on a fresh # line. V2 needs to start on any fresh utf8 character border # - which is not guaranteed in an arbitrary stream endpoint, so # injecting a \n gives us such a guarantee. self.source.write(_b('\n')) stream = subunit.StreamResultToBytes(self.source) stream.status(test_id='process-returncode', test_status='fail', file_name='traceback', mime_type='text/plain;charset=utf8', file_bytes=('returncode %d' % returncode).encode('utf8')) self.source.seek(0) self.done = True def read(self, count=-1): if count == 0: return _b('') result = self.source.read(count) if result: self.lastoutput = result[-1] return result self._append_return_code_as_test() return self.source.read(count) def readline(self): result = self.source.readline() if result: self.lastoutput = result[-1] return result self._append_return_code_as_test() return self.source.readline() def readlines(self): result = self.source.readlines() if result: self.lastoutput = result[-1][-1] self._append_return_code_as_test() result.extend(self.source.readlines()) return result