Exemple #1
0
 def _append_file(tmp_path, final_path, mode='a'):
     """
     Append the contents of tmp_path to final_path and remove tmp_path.
     """
     if 'b' in mode:
         tmp_mode = 'rb'
     else:
         tmp_mode = 'r'
     with _open(str(final_path),
                mode=mode) as final_file, _open(tmp_path,
                                                mode=tmp_mode) as tmp_file:
         final_file.write(tmp_file.read())
     os.remove(tmp_path)
Exemple #2
0
 def __init__(self, filename=None, mode="r", fileobj=None, max_cache=100):
     """Initialize the class."""
     # TODO - Assuming we can seek, check for 28 bytes EOF empty block
     # and if missing warn about possible truncation (as in samtools)?
     if max_cache < 1:
         raise ValueError("Use max_cache with a minimum of 1")
     # Must open the BGZF file in binary mode, but we may want to
     # treat the contents as either text or binary (unicode or
     # bytes under Python 3)
     if fileobj:
         assert filename is None
         handle = fileobj
         assert "b" in handle.mode.lower()
     else:
         if "w" in mode.lower() or "a" in mode.lower():
             raise ValueError(
                 "Must use read mode (default), not write or append mode")
         handle = _open(filename, "rb")
     self._text = "b" not in mode.lower()
     if self._text:
         self._newline = "\n"
     else:
         self._newline = b"\n"
     self._handle = handle
     self.max_cache = max_cache
     self._buffers = {}
     self._block_start_offset = None
     self._block_raw_length = None
     self._load_block(handle.tell())
Exemple #3
0
 def __init__(self, filename=None, mode="w", fileobj=None, compresslevel=6):
     """Initilize the class."""
     if fileobj:
         assert filename is None
         handle = fileobj
     else:
         if "w" not in mode.lower() and "a" not in mode.lower():
             raise ValueError("Must use write or append mode, not %r" %
                              mode)
         if "a" in mode.lower():
             handle = _open(filename, "ab")
         else:
             handle = _open(filename, "wb")
     self._text = "b" not in mode.lower()
     self._handle = handle
     self._buffer = b""
     self.compresslevel = compresslevel
Exemple #4
0
    def __init__(self, filename=None, mode="r", fileobj=None, max_cache=100):
        r"""Initialize the class for reading a BGZF file.

        You would typically use the top level ``bgzf.open(...)`` function
        which will call this class internally. Direct use is discouraged.

        Either the ``filename`` (string) or ``fileobj`` (input file object in
        binary mode) arguments must be supplied, but not both.

        Argument ``mode`` controls if the data will be returned as strings in
        text mode ("rt", "tr", or default "r"), or bytes binary mode ("rb"
        or "br"). The argument name matches the built-in ``open(...)`` and
        standard library ``gzip.open(...)`` function.

        If text mode is requested, in order to avoid multi-byte characters,
        this is hard coded to use the "latin1" encoding, and "\r" and "\n"
        are passed as is (without implementing universal new line mode). There
        is no ``encoding`` argument.

        If your data is in UTF-8 or any other incompatible encoding, you must
        use binary mode, and decode the appropriate fragments yourself.

        Argument ``max_cache`` controls the maximum number of BGZF blocks to
        cache in memory. Each can be up to 64kb thus the default of 100 blocks
        could take up to 6MB of RAM. This is important for efficient random
        access, a small value is fine for reading the file in one pass.
        """
        # TODO - Assuming we can seek, check for 28 bytes EOF empty block
        # and if missing warn about possible truncation (as in samtools)?
        if max_cache < 1:
            raise ValueError("Use max_cache with a minimum of 1")
        # Must open the BGZF file in binary mode, but we may want to
        # treat the contents as either text or binary (unicode or
        # bytes under Python 3)
        if filename and fileobj:
            raise ValueError("Supply either filename or fileobj, not both")
        # Want to reject output modes like w, a, x, +
        if mode.lower() not in ("r", "tr", "rt", "rb", "br"):
            raise ValueError(
                "Must use a read mode like 'r' (default), 'rt', or 'rb' for binary"
            )
        if fileobj:
            if "b" not in fileobj.mode.lower():
                raise ValueError("fileobj not opened in binary mode")
            handle = fileobj
        else:
            handle = _open(filename, "rb")
        self._text = "b" not in mode.lower()
        if self._text:
            self._newline = "\n"
        else:
            self._newline = b"\n"
        self._handle = handle
        self.max_cache = max_cache
        self._buffers = {}
        self._block_start_offset = None
        self._block_raw_length = None
        self._load_block(handle.tell())
Exemple #5
0
    def open(self, filename, mode='r', *args, **kwargs):
        """
        If mode is either 'w' or 'a', opens and returns a handle to a temporary
        file. If mode is 'r' opens and returns a handle to the file specified.

        Once :meth:`~vermouth.file_writer.DeferredFileWriter.write` is called
        the changes written to all files opened this way are propagated to their
        final destination.

        Parameters
        ----------
        filename: os.PathLike
            The final name of the file to be opened.
        mode: str
            The mode in which the file is to be opened.
        *args: collections.abc.Iterable
            Passed to :func:`os.fdopen`.
        **kwargs: dict
            Passed to :func:`os.fdopen`.

        Returns
        -------
        io.IOBase
            An opened file
        """
        path = pathlib.Path(filename)
        # Make the path absolute, in case the current working directory is
        # changed between now and writing. Can't do path.resolve() due to py35
        # requiring the file to exist.
        path = path.parent.resolve() / path.name
        # Let's see if we already opened this file. If so, get the corresponding
        # temporary file.
        for tmp_path, open_path, _ in self.open_files:
            # Can't use Path.samefile, since the files don't have to exist yet
            if open_path == path:
                return _open(tmp_path, mode, *args, **kwargs)

        if '+' in mode or 'a' in mode or 'w' in mode:  # Append and write
            return self._open_tmp_file(path, *args, mode=mode, **kwargs)
        elif 'r' in mode:  # Read, do nothing special
            return _open(filename, mode, *args, **kwargs)
        raise KeyError('Unknown file mode.')
Exemple #6
0
def open(fn, *args, **kwargs):

    '''

    Open a file in the current output directory

    args same as for open()

    '''

    return _open(filename(fn), *args, **kwargs)
Exemple #7
0
 def __init__(self, filename=None, mode="w", fileobj=None, compresslevel=6):
     """Initilize the class."""
     if filename and fileobj:
         raise ValueError("Supply either filename or fileobj, not both")
     if fileobj:
         if "b" not in fileobj.mode.lower():
             raise ValueError("fileobj not opened in binary mode")
         handle = fileobj
     else:
         if "w" not in mode.lower() and "a" not in mode.lower():
             raise ValueError(
                 f"Must use write or append mode, not {mode!r}")
         if "a" in mode.lower():
             handle = _open(filename, "ab")
         else:
             handle = _open(filename, "wb")
     self._text = "b" not in mode.lower()
     self._handle = handle
     self._buffer = b""
     self.compresslevel = compresslevel
Exemple #8
0
 def decompress(self, outName):
     """
     :type string: outName
     Generic decompress function. Will decompress the entire file to
     outName.
     """
     writeOut = _open(outName, 'wb')
     for blk in self.blkDict.values():
         out = self.read_block(blk=blk)
         writeOut.write(out)
         writeOut.flush()
     writeOut.close()
Exemple #9
0
 def decompress(self, outName):
     """
     :type string: outName
     Generic decompress function. Will decompress the entire file to
     outName.
     """
     writeOut = _open(outName, 'wb')
     for blk in self.blkDict.values():
         out = self.read_block(blk=blk)
         writeOut.write(out)
         writeOut.flush()
     writeOut.close()
Exemple #10
0
def open(path, writable=False, encoding='utf-8', append=False, delete_on_close=False):
    attrib = 0x00000080 # FILE_ATTRIBUTES_NORMAL

    if append:
        mode = 'a'
        flags = os.O_APPEND
        access = 0x40000000 # GENERIC_WRITE
        creation = 1 # CREATE_NEW
        share = 1 # FILE_SHARE_READ
    elif writable:
        mode = 'w+'
        flags = os.O_RDWR
        access = 0x40000000 # GENERIC_WRITE
        creation = 2 # CREATE_ALWAYS
        share = 1 # FILE_SHARE_READ
    else:
        mode = 'r'
        flags = os.O_RDONLY
        access = 0x80000000 # GENERIC_READ
        creation = 3 # OPEN_EXISTING
        share = 3 # FILE_SHARE_READ|WRITE

    if encoding is None:
        mode += 'b'
    else:
        flags |= os.O_TEXT

    if delete_on_close:
        flags |= os.O_TEMPORARY
        attrib |= 0x04000000 # FILE_FLAG_DELETE_ON_CLOSE
        share |= 4 # FILE_SHARE_DELETE

    handle = _CreateFileW(path, access, share, None, creation, 0x00000080, None)
    if handle == 0xFFFFFFFF:
        raise IOError("Unable to open file: 0x{:08X}".format(ctypes.GetLastError()))

    fd = msvcrt.open_osfhandle(handle, flags & 0xFFFFFFFF)
    f = _open(fd, mode, encoding=encoding)
    f.path = path
    return f
Exemple #11
0
 def __init__(self, name, fileObj=None, seekable=True):
     parseMagic = lambda x: binascii.hexlify(x[:4])
     self.name = name
     if fileObj:
         self.fileObj = fileObj
         self.compEnd = self.tell_end()
     else:
         self.fileObj = _open(name)
         self.compEnd = self.tell_end()
     self.header = fileObj.read(7)
     if parseMagic(self.header) == b'04224d18':
         self.dCtx = lz4f.createDecompContext()
         self.fileInfo = lz4f.getFrameInfo(self.header, self.dCtx)
         self.blkSizeID = self.fileInfo.get('blkSize')
     else:
         raise IOError
     if seekable:
         try:
             self.load_blocks()
         except:
             print('Unable to load blockDict. Possibly not a lz4 file.')
             raise IOError
Exemple #12
0
 def __init__(self, name, fileObj=None, seekable=True):
     parseMagic = lambda x: binascii.hexlify(x[:4])
     self.name = name
     if fileObj:
         self.fileObj = fileObj
         self.compEnd = self.tell_end()
     else:
         self.fileObj = _open(name)
         self.compEnd = self.tell_end()
     self.header = fileObj.read(7)
     if parseMagic(self.header) == b'04224d18':
         self.dCtx = lz4f.createDecompContext()
         self.fileInfo = lz4f.getFrameInfo(self.header, self.dCtx)
         self.blkSizeID = self.fileInfo.get('blkSize')
     else:
         raise IOError
     if seekable:
         try:
             self.load_blocks()
         except:
             print('Unable to load blockDict. Possibly not a lz4 file.')
             raise IOError
Exemple #13
0
 def open(cls, name=None, fileObj=None, seekable=True):
     if not name and not fileObj:
         sys.stderr.write('Nothing to open!')
     if not fileObj:
         fileObj = _open(name, 'rb')
     return cls(name, fileObj, seekable)
Exemple #14
0
}

DB_FILE = 'data.sqlite'
DATA_DIR = 'data'
PAGE_SIZE = 20
UPDATE_MODE = os.getenv('update') not in ['false', '0']
HEADLESS = os.getenv('headless') not in ['false', '0']
TITLE_REGEX = re.compile('\[.*?\]《.*?》')
FILENAME_BAD_CHAR_REGEX = re.compile('\\\\|/|:|\*|\?|"|<|>|\|')

logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s: [%(levelname)s] %(message)s')

# fixups
from builtins import open as _open
open = lambda fp, rw: _open(fp, rw, encoding='utf8')


# Data storage
class SQLite3:
    def __init__(self, dbfile=os.path.join(BASE_PATH, DB_FILE)):
        self.conn = sqlite3.connect(dbfile)

    def query(self, sql, params=None):
        cur = self.conn.cursor()
        if params: cur.execute(sql, params)
        else: cur.execute(sql)
        data = cur.fetchall()
        cur.close()
        self.conn.commit()
        return data
Exemple #15
0
 def open(cls, name=None, fileObj=None, seekable=True):
     if not name and not fileObj:
         sys.stderr.write('Nothing to open!')
     if not fileObj:
         fileObj = _open(name, 'rb')
     return cls(name, fileObj, seekable)