def shlexer(data=None, filepath=None, wordchars=None, whitespace=None): if data is None: if pycompat.ispy3: data = open(filepath, b'r', encoding=r'latin1') else: data = open(filepath, b'r') else: if filepath is not None: raise error.ProgrammingError( b'shlexer only accepts data or filepath, not both') if pycompat.ispy3: data = data.decode('latin1') l = shlex.shlex(data, infile=filepath, posix=True) if whitespace is not None: l.whitespace_split = True if pycompat.ispy3: l.whitespace += whitespace.decode('latin1') else: l.whitespace += whitespace if wordchars is not None: if pycompat.ispy3: l.wordchars += wordchars.decode('latin1') else: l.wordchars += wordchars if pycompat.ispy3: return _shlexpy3proxy(l) return l
def _validatekey(self, path, action): with open(path, b'rb') as f: data = f.read() if self._validatedata(data, path): return True if self._validatecachelog: with open(self._validatecachelog, b'ab+') as f: f.write(b"corrupt %s during %s\n" % (path, action)) os.rename(path, path + b".corrupt") return False
def do_relink(src, dst, files, ui): def relinkfile(src, dst): bak = dst + b'.bak' os.rename(dst, bak) try: util.oslink(src, dst) except OSError: os.rename(bak, dst) raise os.remove(bak) CHUNKLEN = 65536 relinked = 0 savedbytes = 0 progress = ui.makeprogress(_(b'relinking'), unit=_(b'files'), total=len(files)) pos = 0 for f, sz in files: pos += 1 source = os.path.join(src, f) tgt = os.path.join(dst, f) # Binary mode, so that read() works correctly, especially on Windows sfp = open(source, b'rb') dfp = open(tgt, b'rb') sin = sfp.read(CHUNKLEN) while sin: din = dfp.read(CHUNKLEN) if sin != din: break sin = sfp.read(CHUNKLEN) sfp.close() dfp.close() if sin: ui.debug(b'not linkable: %s\n' % f) continue try: relinkfile(source, tgt) progress.update(pos, item=f) relinked += 1 savedbytes += sz except OSError as inst: ui.warn(b'%s: %s\n' % (tgt, stringutil.forcebytestr(inst))) progress.complete() ui.status( _(b'relinked %d files (%s reclaimed)\n') % (relinked, util.bytecount(savedbytes)))
def flush(self): """write the state down to the file""" if not self.path: return if self._lastmaxrev == -1: # write the entire file with open(self.path, b'wb') as f: f.write(self.HEADER) for i in pycompat.xrange(1, len(self._rev2hsh)): self._writerev(i, f) else: # append incrementally with open(self.path, b'ab') as f: for i in pycompat.xrange(self._lastmaxrev + 1, len(self._rev2hsh)): self._writerev(i, f) self._lastmaxrev = self.maxrev
def _asyncsavemetadata(root, nodes): '''starts a separate process that fills metadata for the nodes This function creates a separate process and doesn't wait for it's completion. This was done to avoid slowing down pushes ''' maxnodes = 50 if len(nodes) > maxnodes: return nodesargs = [] for node in nodes: nodesargs.append(b'--node') nodesargs.append(node) with open(os.devnull, b'w+b') as devnull: cmdline = [ util.hgexecutable(), b'debugfillinfinitepushmetadata', b'-R', root, ] + nodesargs # Process will run in background. We don't care about the return code subprocess.Popen( pycompat.rapply(procutil.tonativestr, cmdline), close_fds=True, shell=False, stdin=devnull, stdout=devnull, stderr=devnull, )
def filter(self, filter, node, changelog, patchfile): '''arbitrarily rewrite changeset before applying it''' self.ui.status(_(b'filtering %s\n') % patchfile) user, date, msg = (changelog[1], changelog[2], changelog[4]) fd, headerfile = pycompat.mkstemp(prefix=b'hg-transplant-') fp = os.fdopen(fd, 'wb') fp.write(b"# HG changeset patch\n") fp.write(b"# User %s\n" % user) fp.write(b"# Date %d %d\n" % date) fp.write(msg + b'\n') fp.close() try: self.ui.system( b'%s %s %s' % ( filter, procutil.shellquote(headerfile), procutil.shellquote(patchfile), ), environ={ b'HGUSER': changelog[1], b'HGREVISION': hex(node), }, onerr=error.Abort, errprefix=_(b'filter failed'), blockedtag=b'transplant_filter', ) user, date, msg = self.parselog(open(headerfile, b'rb'))[1:4] finally: os.unlink(headerfile) return (user, date, msg)
def _getfile(self, tmpfile, filename, hash): path = lfutil.findfile(self.remote, hash) if not path: raise basestore.StoreError(filename, hash, self.url, _(b"can't get file locally")) with open(path, b'rb') as fd: return lfutil.copyandhash(util.filechunkiter(fd), tmpfile)
def _load(self): """load state from file""" if not self.path: return # use local variables in a loop. CPython uses LOAD_FAST for them, # which is faster than both LOAD_CONST and LOAD_GLOBAL. flaglen = 1 hshlen = _hshlen with open(self.path, b'rb') as f: if f.read(len(self.HEADER)) != self.HEADER: raise error.CorruptedFileError() self.clear(flush=False) while True: buf = f.read(flaglen) if not buf: break flag = ord(buf) rev = len(self._rev2hsh) if flag & renameflag: path = self._readcstr(f) self._renamerevs.append(rev) self._renamepaths.append(path) hsh = f.read(hshlen) if len(hsh) != hshlen: raise error.CorruptedFileError() self._hsh2rev[hsh] = rev self._rev2flag.append(flag) self._rev2hsh.append(hsh) self._lastmaxrev = self.maxrev
def readauthormap(ui, authorfile, authors=None): if authors is None: authors = {} with open(authorfile, b'rb') as afile: for line in afile: line = line.strip() if not line or line.startswith(b'#'): continue try: srcauthor, dstauthor = line.split(b'=', 1) except ValueError: msg = _(b'ignoring bad line in author map file %s: %s\n') ui.warn(msg % (authorfile, line.rstrip())) continue srcauthor = srcauthor.strip() dstauthor = dstauthor.strip() if authors.get(srcauthor) in (None, dstauthor): msg = _(b'mapping author %s to %s\n') ui.debug(msg % (srcauthor, dstauthor)) authors[srcauthor] = dstauthor continue m = _(b'overriding mapping for author %s, was %s, will be %s\n') ui.status(m % (srcauthor, authors[srcauthor], dstauthor)) return authors
def parsefileblob(path, decompress): f = open(path, b"rb") try: raw = f.read() finally: f.close() if decompress: raw = _decompressblob(raw) offset, size, flags = shallowutil.parsesizeflags(raw) start = offset + size firstnode = None mapping = {} while start < len(raw): divider = raw.index(b'\0', start + 80) currentnode = raw[start : (start + 20)] if not firstnode: firstnode = currentnode p1 = raw[(start + 20) : (start + 40)] p2 = raw[(start + 40) : (start + 60)] linknode = raw[(start + 60) : (start + 80)] copyfrom = raw[(start + 80) : divider] mapping[currentnode] = (p1, p2, linknode, copyfrom) start = divider + 1 return size, firstnode, mapping
def readauthormap(self, authorfile): afile = open(authorfile, b'rb') for line in afile: line = line.strip() if not line or line.startswith(b'#'): continue try: srcauthor, dstauthor = line.split(b'=', 1) except ValueError: msg = _(b'ignoring bad line in author map file %s: %s\n') self.ui.warn(msg % (authorfile, line.rstrip())) continue srcauthor = srcauthor.strip() dstauthor = dstauthor.strip() if self.authors.get(srcauthor) in (None, dstauthor): msg = _(b'mapping author %s to %s\n') self.ui.debug(msg % (srcauthor, dstauthor)) self.authors[srcauthor] = dstauthor continue m = _(b'overriding mapping for author %s, was %s, will be %s\n') self.ui.status(m % (srcauthor, self.authors[srcauthor], dstauthor)) afile.close()
def close(self): if self._revmap is not None: self._revmap.flush() self._revmap = None if self._linelog is not None: with open(self.linelogpath, b'wb') as f: f.write(self._linelog.encode()) self._linelog = None
def __init__(self, ui, repotype, path=None, revs=None): common.converter_source.__init__(self, ui, repotype, path, revs) if revs and len(revs) > 1: raise error.Abort( _(b'monotone source does not support specifying ' b'multiple revs')) common.commandline.__init__(self, ui, b'mtn') self.ui = ui self.path = path self.automatestdio = False self.revs = revs norepo = common.NoRepo( _(b"%s does not look like a monotone repository") % path) if not os.path.exists(os.path.join(path, b'_MTN')): # Could be a monotone repository (SQLite db file) try: f = open(path, b'rb') header = f.read(16) f.close() except IOError: header = b'' if header != b'SQLite format 3\x00': raise norepo # regular expressions for parsing monotone output space = br'\s*' name = br'\s+"((?:\\"|[^"])*)"\s*' value = name revision = br'\s+\[(\w+)\]\s*' lines = br'(?:.|\n)+' self.dir_re = re.compile(space + b"dir" + name) self.file_re = re.compile(space + b"file" + name + b"content" + revision) self.add_file_re = re.compile(space + b"add_file" + name + b"content" + revision) self.patch_re = re.compile(space + b"patch" + name + b"from" + revision + b"to" + revision) self.rename_re = re.compile(space + b"rename" + name + b"to" + name) self.delete_re = re.compile(space + b"delete" + name) self.tag_re = re.compile(space + b"tag" + name + b"revision" + revision) self.cert_re = re.compile(lines + space + b"name" + name + b"value" + value) attr = space + b"file" + lines + space + b"attr" + space self.attr_execute_re = re.compile(attr + b'"mtn:execute"' + space + b'"true"') # cached data self.manifest_rev = None self.manifest = None self.files = None self.dirs = None common.checktool(b'mtn', abort=False)
def writeauthormap(self): authorfile = self.authorfile if authorfile: self.ui.status(_(b'writing author map file %s\n') % authorfile) ofile = open(authorfile, b'wb+') for author in self.authors: ofile.write( util.tonativeeol(b"%s=%s\n" % (author, self.authors[author]))) ofile.close()
def __init__(self, ui, repotype, path, revs=None): super(convert_cvs, self).__init__(ui, repotype, path, revs=revs) cvs = os.path.join(path, b"CVS") if not os.path.exists(cvs): raise NoRepo(_(b"%s does not look like a CVS checkout") % path) checktool(b'cvs') self.changeset = None self.files = {} self.tags = {} self.lastbranch = {} self.socket = None self.cvsroot = open(os.path.join(cvs, b"Root"), b'rb').read()[:-1] self.cvsrepo = open(os.path.join(cvs, b"Repository"), b'rb').read()[:-1] self.encoding = encoding.encoding self._connect()
def __setitem__(self, key, value): if self.fp is None: try: self.fp = open(self.path, b'ab') except IOError as err: raise error.Abort( _(b'could not open map file %r: %s') % (self.path, encoding.strtolocal(err.strerror))) self.fp.write(util.tonativeeol(b'%s %s\n' % (key, value))) self.fp.flush() super(mapfile, self).__setitem__(key, value)
def write(self, data): filename = node.hex(hashlib.sha1(data).digest()) dirpath = self._dirpath(filename) if not os.path.exists(dirpath): os.makedirs(dirpath) with open(self._filepath(filename), b'wb') as f: f.write(data) return filename
def copytostoreabsolute(repo, file, hash): if inusercache(repo.ui, hash): link(usercachepath(repo.ui, hash), storepath(repo, hash)) else: util.makedirs(os.path.dirname(storepath(repo, hash))) with open(file, b'rb') as srcf: with util.atomictempfile(storepath(repo, hash), createmode=repo.store.createmode) as dstf: for chunk in util.filechunkiter(srcf): dstf.write(chunk) linktousercache(repo, hash)
def link(src, dest): """Try to create hardlink - if that fails, efficiently make a copy.""" util.makedirs(os.path.dirname(dest)) try: util.oslink(src, dest) except OSError: # if hardlinks fail, fallback on atomic copy with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf: for chunk in util.filechunkiter(srcf): dstf.write(chunk) os.chmod(dest, os.stat(src).st_mode)
def linelog(self): if self._linelog is None: if os.path.exists(self.linelogpath): with open(self.linelogpath, b'rb') as f: try: self._linelog = linelogmod.linelog.fromdata(f.read()) except linelogmod.LineLogError: self._linelog = linelogmod.linelog() else: self._linelog = linelogmod.linelog() return self._linelog
def freememory(self): """Unmap and remap the memory to free it up after known expensive operations. Return True if self._data and self._index were reloaded. """ if self._index: if self._pagedin < self.MAXPAGEDIN: return False self._index.close() self._data.close() # TODO: use an opener/vfs to access these paths with open(self.indexpath, PACKOPENMODE) as indexfp: # memory-map the file, size 0 means whole file self._index = mmap.mmap(indexfp.fileno(), 0, access=mmap.ACCESS_READ) with open(self.packpath, PACKOPENMODE) as datafp: self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ) self._pagedin = 0 return True
def getlastnode(path): """return the last hash in a revmap, without loading its full content. this is equivalent to `m = revmap(path); m.rev2hsh(m.maxrev)`, but faster. """ hsh = None try: with open(path, b'rb') as f: f.seek(-_hshlen, io.SEEK_END) if f.tell() > len(revmap.HEADER): hsh = f.read(_hshlen) except IOError: pass return hsh
def markrepo(self, path): """Call this to add the given repo path to the store's list of repositories that are using it. This is useful later when doing garbage collection, since it allows us to insecpt the repos to see what nodes they want to be kept alive in the store. """ repospath = os.path.join(self._path, b"repos") with open(repospath, b'ab') as reposfile: reposfile.write(os.path.dirname(path) + b"\n") repospathstat = os.stat(repospath) if repospathstat.st_uid == self._uid: os.chmod(repospath, 0o0664)
def readfile(path): f = open(path, b'rb') try: result = f.read() # we should never have empty files if not result: os.remove(path) raise IOError(b"empty file: %s" % path) return result finally: f.close()
def _getdata(self, name, node): filepath = self._getfilepath(name, node) try: data = shallowutil.readfile(filepath) if self._validatecache and not self._validatedata(data, filepath): if self._validatecachelog: with open(self._validatecachelog, b'ab+') as f: f.write(b"corrupt %s during read\n" % filepath) os.rename(filepath, filepath + b".corrupt") raise KeyError(b"corrupt local cache file %s" % filepath) except IOError: raise KeyError(b"no file found at %s for %s:%s" % (filepath, name, hex(node))) return data
def readytofetch(repo): """Check that enough time has passed since the last background prefetch. This only relates to prefetches after operations that change the working copy parent. Default delay between background prefetches is 2 minutes. """ timeout = repo.ui.configint(b'remotefilelog', b'prefetchdelay') fname = repo.vfs.join(b'lastprefetch') ready = False with open(fname, b'a'): # the with construct above is used to avoid race conditions modtime = os.path.getmtime(fname) if (time.time() - modtime) > timeout: os.utime(fname, None) ready = True return ready
def parsesplicemap(self, path): """check and validate the splicemap format and return a child/parents dictionary. Format checking has two parts. 1. generic format which is same across all source types 2. specific format checking which may be different for different source type. This logic is implemented in checkrevformat function in source files like hg.py, subversion.py etc. """ if not path: return {} m = {} try: fp = open(path, b'rb') for i, line in enumerate(util.iterfile(fp)): line = line.splitlines()[0].rstrip() if not line: # Ignore blank lines continue # split line lex = common.shlexer(data=line, whitespace=b',') line = list(lex) # check number of parents if not (2 <= len(line) <= 3): raise error.Abort( _( b'syntax error in %s(%d): child parent1' b'[,parent2] expected' ) % (path, i + 1) ) for part in line: self.source.checkrevformat(part) child, p1, p2 = line[0], line[1:2], line[2:] if p1 == p2: m[child] = p1 else: m[child] = p1 + p2 # if file does not exist or error reading, exit except IOError: raise error.Abort( _(b'splicemap file not found or error reading %s:') % path ) return m
def _cmpsets(l1, l2): try: if b'FSMONITOR_LOG_FILE' in encoding.environ: fn = encoding.environ[b'FSMONITOR_LOG_FILE'] f = open(fn, b'wb') else: fn = b'fsmonitorfail.log' f = self.vfs.open(fn, b'wb') except (IOError, OSError): self.ui.warn(_(b'warning: unable to write to %s\n') % fn) return try: for i, (s1, s2) in enumerate(zip(l1, l2)): if set(s1) != set(s2): f.write(b'sets at position %d are unequal\n' % i) f.write(b'watchman returned: %s\n' % s1) f.write(b'stat returned: %s\n' % s2) finally: f.close()
def _loadfileblob(repo, cachepath, path, node): filecachepath = os.path.join(cachepath, path, hex(node)) if not os.path.exists(filecachepath) or os.path.getsize( filecachepath) == 0: filectx = repo.filectx(path, fileid=node) if filectx.node() == repo.nullid: repo.changelog = changelog.changelog(repo.svfs) filectx = repo.filectx(path, fileid=node) text = createfileblob(filectx) # TODO configurable compression engines text = zlib.compress(text) # everything should be user & group read/writable oldumask = os.umask(0o002) try: dirname = os.path.dirname(filecachepath) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as ex: if ex.errno != errno.EEXIST: raise f = None try: f = util.atomictempfile(filecachepath, b"wb") f.write(text) except (IOError, OSError): # Don't abort if the user only has permission to read, # and not write. pass finally: if f: f.close() finally: os.umask(oldumask) else: with open(filecachepath, b"rb") as f: text = f.read() return text
def getlfile(repo, proto, sha): '''Server command for retrieving a largefile from the repository-local cache or user cache.''' filename = lfutil.findfile(repo, sha) if not filename: raise error.Abort( _(b'requested largefile %s not present in cache') % sha) f = open(filename, b'rb') length = os.fstat(f.fileno())[6] # Since we can't set an HTTP content-length header here, and # Mercurial core provides no way to give the length of a streamres # (and reading the entire file into RAM would be ill-advised), we # just send the length on the first line of the response, like the # ssh proto does for string responses. def generator(): yield b'%d\n' % length for chunk in util.filechunkiter(f): yield chunk return wireprototypes.streamreslegacy(gen=generator())