def _append_file(tmp_path, final_path, mode='a'): """ Append the contents of tmp_path to final_path and remove tmp_path. """ if 'b' in mode: tmp_mode = 'rb' else: tmp_mode = 'r' with _open(str(final_path), mode=mode) as final_file, _open(tmp_path, mode=tmp_mode) as tmp_file: final_file.write(tmp_file.read()) os.remove(tmp_path)
def __init__(self, filename=None, mode="r", fileobj=None, max_cache=100): """Initialize the class.""" # TODO - Assuming we can seek, check for 28 bytes EOF empty block # and if missing warn about possible truncation (as in samtools)? if max_cache < 1: raise ValueError("Use max_cache with a minimum of 1") # Must open the BGZF file in binary mode, but we may want to # treat the contents as either text or binary (unicode or # bytes under Python 3) if fileobj: assert filename is None handle = fileobj assert "b" in handle.mode.lower() else: if "w" in mode.lower() or "a" in mode.lower(): raise ValueError( "Must use read mode (default), not write or append mode") handle = _open(filename, "rb") self._text = "b" not in mode.lower() if self._text: self._newline = "\n" else: self._newline = b"\n" self._handle = handle self.max_cache = max_cache self._buffers = {} self._block_start_offset = None self._block_raw_length = None self._load_block(handle.tell())
def __init__(self, filename=None, mode="w", fileobj=None, compresslevel=6): """Initilize the class.""" if fileobj: assert filename is None handle = fileobj else: if "w" not in mode.lower() and "a" not in mode.lower(): raise ValueError("Must use write or append mode, not %r" % mode) if "a" in mode.lower(): handle = _open(filename, "ab") else: handle = _open(filename, "wb") self._text = "b" not in mode.lower() self._handle = handle self._buffer = b"" self.compresslevel = compresslevel
def __init__(self, filename=None, mode="r", fileobj=None, max_cache=100): r"""Initialize the class for reading a BGZF file. You would typically use the top level ``bgzf.open(...)`` function which will call this class internally. Direct use is discouraged. Either the ``filename`` (string) or ``fileobj`` (input file object in binary mode) arguments must be supplied, but not both. Argument ``mode`` controls if the data will be returned as strings in text mode ("rt", "tr", or default "r"), or bytes binary mode ("rb" or "br"). The argument name matches the built-in ``open(...)`` and standard library ``gzip.open(...)`` function. If text mode is requested, in order to avoid multi-byte characters, this is hard coded to use the "latin1" encoding, and "\r" and "\n" are passed as is (without implementing universal new line mode). There is no ``encoding`` argument. If your data is in UTF-8 or any other incompatible encoding, you must use binary mode, and decode the appropriate fragments yourself. Argument ``max_cache`` controls the maximum number of BGZF blocks to cache in memory. Each can be up to 64kb thus the default of 100 blocks could take up to 6MB of RAM. This is important for efficient random access, a small value is fine for reading the file in one pass. """ # TODO - Assuming we can seek, check for 28 bytes EOF empty block # and if missing warn about possible truncation (as in samtools)? if max_cache < 1: raise ValueError("Use max_cache with a minimum of 1") # Must open the BGZF file in binary mode, but we may want to # treat the contents as either text or binary (unicode or # bytes under Python 3) if filename and fileobj: raise ValueError("Supply either filename or fileobj, not both") # Want to reject output modes like w, a, x, + if mode.lower() not in ("r", "tr", "rt", "rb", "br"): raise ValueError( "Must use a read mode like 'r' (default), 'rt', or 'rb' for binary" ) if fileobj: if "b" not in fileobj.mode.lower(): raise ValueError("fileobj not opened in binary mode") handle = fileobj else: handle = _open(filename, "rb") self._text = "b" not in mode.lower() if self._text: self._newline = "\n" else: self._newline = b"\n" self._handle = handle self.max_cache = max_cache self._buffers = {} self._block_start_offset = None self._block_raw_length = None self._load_block(handle.tell())
def open(self, filename, mode='r', *args, **kwargs): """ If mode is either 'w' or 'a', opens and returns a handle to a temporary file. If mode is 'r' opens and returns a handle to the file specified. Once :meth:`~vermouth.file_writer.DeferredFileWriter.write` is called the changes written to all files opened this way are propagated to their final destination. Parameters ---------- filename: os.PathLike The final name of the file to be opened. mode: str The mode in which the file is to be opened. *args: collections.abc.Iterable Passed to :func:`os.fdopen`. **kwargs: dict Passed to :func:`os.fdopen`. Returns ------- io.IOBase An opened file """ path = pathlib.Path(filename) # Make the path absolute, in case the current working directory is # changed between now and writing. Can't do path.resolve() due to py35 # requiring the file to exist. path = path.parent.resolve() / path.name # Let's see if we already opened this file. If so, get the corresponding # temporary file. for tmp_path, open_path, _ in self.open_files: # Can't use Path.samefile, since the files don't have to exist yet if open_path == path: return _open(tmp_path, mode, *args, **kwargs) if '+' in mode or 'a' in mode or 'w' in mode: # Append and write return self._open_tmp_file(path, *args, mode=mode, **kwargs) elif 'r' in mode: # Read, do nothing special return _open(filename, mode, *args, **kwargs) raise KeyError('Unknown file mode.')
def open(fn, *args, **kwargs): ''' Open a file in the current output directory args same as for open() ''' return _open(filename(fn), *args, **kwargs)
def __init__(self, filename=None, mode="w", fileobj=None, compresslevel=6): """Initilize the class.""" if filename and fileobj: raise ValueError("Supply either filename or fileobj, not both") if fileobj: if "b" not in fileobj.mode.lower(): raise ValueError("fileobj not opened in binary mode") handle = fileobj else: if "w" not in mode.lower() and "a" not in mode.lower(): raise ValueError( f"Must use write or append mode, not {mode!r}") if "a" in mode.lower(): handle = _open(filename, "ab") else: handle = _open(filename, "wb") self._text = "b" not in mode.lower() self._handle = handle self._buffer = b"" self.compresslevel = compresslevel
def decompress(self, outName): """ :type string: outName Generic decompress function. Will decompress the entire file to outName. """ writeOut = _open(outName, 'wb') for blk in self.blkDict.values(): out = self.read_block(blk=blk) writeOut.write(out) writeOut.flush() writeOut.close()
def open(path, writable=False, encoding='utf-8', append=False, delete_on_close=False): attrib = 0x00000080 # FILE_ATTRIBUTES_NORMAL if append: mode = 'a' flags = os.O_APPEND access = 0x40000000 # GENERIC_WRITE creation = 1 # CREATE_NEW share = 1 # FILE_SHARE_READ elif writable: mode = 'w+' flags = os.O_RDWR access = 0x40000000 # GENERIC_WRITE creation = 2 # CREATE_ALWAYS share = 1 # FILE_SHARE_READ else: mode = 'r' flags = os.O_RDONLY access = 0x80000000 # GENERIC_READ creation = 3 # OPEN_EXISTING share = 3 # FILE_SHARE_READ|WRITE if encoding is None: mode += 'b' else: flags |= os.O_TEXT if delete_on_close: flags |= os.O_TEMPORARY attrib |= 0x04000000 # FILE_FLAG_DELETE_ON_CLOSE share |= 4 # FILE_SHARE_DELETE handle = _CreateFileW(path, access, share, None, creation, 0x00000080, None) if handle == 0xFFFFFFFF: raise IOError("Unable to open file: 0x{:08X}".format(ctypes.GetLastError())) fd = msvcrt.open_osfhandle(handle, flags & 0xFFFFFFFF) f = _open(fd, mode, encoding=encoding) f.path = path return f
def __init__(self, name, fileObj=None, seekable=True): parseMagic = lambda x: binascii.hexlify(x[:4]) self.name = name if fileObj: self.fileObj = fileObj self.compEnd = self.tell_end() else: self.fileObj = _open(name) self.compEnd = self.tell_end() self.header = fileObj.read(7) if parseMagic(self.header) == b'04224d18': self.dCtx = lz4f.createDecompContext() self.fileInfo = lz4f.getFrameInfo(self.header, self.dCtx) self.blkSizeID = self.fileInfo.get('blkSize') else: raise IOError if seekable: try: self.load_blocks() except: print('Unable to load blockDict. Possibly not a lz4 file.') raise IOError
def open(cls, name=None, fileObj=None, seekable=True): if not name and not fileObj: sys.stderr.write('Nothing to open!') if not fileObj: fileObj = _open(name, 'rb') return cls(name, fileObj, seekable)
} DB_FILE = 'data.sqlite' DATA_DIR = 'data' PAGE_SIZE = 20 UPDATE_MODE = os.getenv('update') not in ['false', '0'] HEADLESS = os.getenv('headless') not in ['false', '0'] TITLE_REGEX = re.compile('\[.*?\]《.*?》') FILENAME_BAD_CHAR_REGEX = re.compile('\\\\|/|:|\*|\?|"|<|>|\|') logging.basicConfig(level=logging.INFO, format='%(asctime)s: [%(levelname)s] %(message)s') # fixups from builtins import open as _open open = lambda fp, rw: _open(fp, rw, encoding='utf8') # Data storage class SQLite3: def __init__(self, dbfile=os.path.join(BASE_PATH, DB_FILE)): self.conn = sqlite3.connect(dbfile) def query(self, sql, params=None): cur = self.conn.cursor() if params: cur.execute(sql, params) else: cur.execute(sql) data = cur.fetchall() cur.close() self.conn.commit() return data