def common_iprefix(*strings): """Return the length of the common prefix of strings""" i = 0 for i in xrange(0, min(len(s) for s in strings)): if not eq(*(s[i] for s in strings)): return i return i
def common_isuffix(*strings): """Return the length of the common suffix of strings""" i = -1 for i in xrange(0, min(len(s) for s in strings)): if not eq(*(s[len(s) - i - 1] for s in strings)): return i return i + 1
def write_and_check(path, size, filler, checksum, padding='\0'): original = checksum() final = checksum() if len(padding) != 1: raise ValueError(lang.WRONG_ONE_CHAR_STRING % padding) with fd.File.open(path, fd.FO_WRNEW) as fp: written = 0 for data in filler: data = str(data) data_len = len(data) if written + data_len > size: data = data[:size-written] fp.write(data) original.update(data) written += data_len if written < size: for _ in xrange(0, size - written): fp.write(padding) original.update(padding) with fd.File.open(path, fd.FO_READEX) as fp: if len(fp) != size: return 0 read = 0 while read < size: chunk = fp.read(BUFSIZE) final.update(chunk) if not chunk: return 0 read += len(chunk) if size % BUFSIZE: chunk = fp.read(BUFSIZE) final.update(chunk) if fp.read(1): return 0 return original.digest() == final.digest()
def chunks(self, size, start=0, stop=None): """Divides FD's content in chunk of length *size* starting from *start* and stopping at *stop*, if *stop* is None it'll stop at end of FD's content.""" size = int(size) for offset in xrange(*slice(int(start), None if stop is None else int(stop), size).indices(len(self))): yield self.pread(size, offset)
def clear_all_opened_fds(exclude = None): """Close all open file descriptors in current process by violence way This function will try to close file descriptors from 0 to maximum value in current process whatever available or not """ maxfd = FD.get_maximum_value() for fd in xrange(0, maxfd): if exclude is None or fd not in exclude: unistd.close(fd)
def chunks(self, size, start=0, stop=None): """Divides FD's content in chunk of length *size* starting from *start* and stopping at *stop*, if *stop* is None it'll stop at end of FD's content.""" size = int(size) for offset in xrange( *slice(int(start), None if stop is None else int(stop), size).indices(len(self))): yield self.pread(size, offset)
def ioc(seq, shift=1): """Return the index of coincidence""" match = 0 seq_len = len(seq) for i in xrange(0, seq_len): j = (i + shift) % seq_len if seq[i] == seq[j]: match += 1 return float(match) / float(seq_len)
def _main(): path = os.path.abspath(sys.argv[1]) fno = 0 with fd.File.open(path, fd.FO_READEX) as txt: # split and sort prev_end = offset = 0 for lineno, (start, end) in enumerate(txt.xlines(keep_eol=1, size=BUFSIZE)): if end - offset > MAX_MEMORY_SORT: if end - prev_end > MAX_MEMORY_SORT: print >> sys.stderr, "[ERROR]" print >> sys.stderr, "Line %d bigger than MAX_MEMORY_SORT limit" % lineno print >> sys.stderr, "Line's length: %d" % (end - prev_end) print >> sys.stderr, "MAX_MEMORY_SORT limit: %d" % MAX_MEMORY_SORT return 1 with fd.File.open(os.path.join(TMP_DIR, '%s.srt' % str(fno)), fd.FO_WRITE) as fout: fout.truncate() for line in sort_in_memory(txt, offset, prev_end): fout.write(line) fno += 1 offset = end prev_end = end else: with fd.File.open(os.path.join(TMP_DIR, '%s.srt' % str(fno)), fd.FO_WRITE) as fout: fout.truncate() for line in sort_in_memory(txt, offset, prev_end): fout.write(line) fno += 1 splits = fno # merge and sort files = [fd.File.open(os.path.join(TMP_DIR, '%s.srt' % str(fno)), fd.FO_READ).lines() for fno in xrange(0, splits)] lines = [f.next() for f in files] while files: fno, line = min(enumerate(lines), key=operator.itemgetter(1)) print line try: lines[fno] = files[fno].next() except StopIteration: del lines[fno] del files[fno] for i in xrange(0, splits): os.unlink(os.path.join(TMP_DIR, '%s.srt' % str(i)))
def __getitem__(self, index): if isinstance(index, int): return self.pread(1, index) elif isinstance(index, slice): start, stop, step = index.indices(len(self)) if step == 1: return self.pread(stop - start, start) else: return ''.join(self.pread(1, pos) for pos in xrange(start, stop, step)) raise IndexError('wrong index type: %s' % type(index))
def __getitem__(self, index): if isinstance(index, int): return self.pread(1, index) elif isinstance(index, slice): start, stop, step = index.indices(len(self)) if step == 1: return self.pread(stop - start, start) else: return ''.join( self.pread(1, pos) for pos in xrange(start, stop, step)) raise IndexError('wrong index type: %s' % type(index))
def rk(source, pattern, start=0, stop=None, checksum=adler32): """Return a generator that yields all oocurrencies of pattern in source[start:stop] using the Rabin-Karp algorithm. hasher is a callable that returns the object checksum.""" if stop is None: stop = len(source) src_len = stop - start pat_len = len(pattern) pat_checksum = checksum(pattern) if pat_len > src_len or start > stop: return iter(()) return (i for i in xrange(start, src_len - pat_len + 1) if checksum(source[i:i + pat_len]) == pat_checksum and source[i:i + pat_len] == pattern)
def split_newlines(string): """Generator of lines in *string* ending with '\r\n', '\n', '\n'""" newline_chars = 0 line_start = 0 for i in xrange(0, len(string)): if string[i] in ('\r', '\n'): if not newline_chars: yield string[line_start:i] newline_chars = 1 else: if newline_chars: line_start = i newline_chars = 0 if not newline_chars: yield string[line_start:]
def knp(source, pattern, start=0, stop=None): """Yields all occurrences of pattern in source[start:stop]""" shifts = [1] * (len(pattern) + 1) shift = 1 for pos in xrange(0, len(pattern)): while pattern[pos] != pattern[pos - shift] and shift <= pos: shift += shifts[pos - shift] shifts[pos + 1] = shift # search pattern mlen = 0 plen = len(pattern) for sub in islice(source, start, stop): while mlen == plen or mlen >= 0 and pattern[mlen] != sub: sl = shifts[mlen] start += sl mlen -= sl mlen += 1 if mlen == plen: yield start