def _changesetForRevision(self, revision): from datetime import datetime from vcpx.changes import Changeset, ChangesetEntry action_map = { 'A': ChangesetEntry.ADDED, 'D': ChangesetEntry.DELETED, 'M': ChangesetEntry.UPDATED, 'R': ChangesetEntry.RENAMED } # find parent lines = self.repository.runCommand( ['rev-list', '--pretty=raw', '--max-count=1', revision], GetUpstreamChangesetsFailure) parents = [] user = Changeset.ANONYMOUS_USER loglines = [] date = None for line in lines: if line.startswith('parent'): parents.append(line.split(' ').pop()) if line.startswith('author'): author_fields = line.split(' ')[1:] tz = int(author_fields.pop()) dt = int(author_fields.pop()) user = '******'.join(author_fields) tzsecs = abs(tz) tzsecs = (tzsecs / 100 * 60 + tzsecs % 100) * 60 if tz < 0: tzsecs = -tzsecs date = datetime.fromtimestamp(dt, FixedOffset(tzsecs / 60)) if line.startswith(' '): loglines.append(line.lstrip(' ')) message = '\n'.join(loglines) entries = [] cmd = ['diff-tree', '--root', '-r', '-M', '--name-status'] # haven't thought about merges yet... if parents: cmd.append(parents[0]) cmd.append(revision) files = self.repository.runCommand(cmd, GetUpstreamChangesetsFailure)[:-1] if not parents: # git lets us know what it's diffing against if we omit parent if len(files) > 0: files.pop(0) for line in files: fields = line.split('\t') state = fields.pop(0) name = fields.pop() e = ChangesetEntry(name) e.action_kind = action_map[state[0]] if e.action_kind == ChangesetEntry.RENAMED: e.old_name = fields.pop() entries.append(e) return Changeset(revision, date, user, message, entries)
def __parse_revision_logs(self, fqrevlist, update=True): changesets = [] logparser = Parser() c = ExternalCommand(cwd=self.repository.basedir, command=self.repository.command("cat-archive-log")) for fqrev in fqrevlist: out, err = c.execute(fqrev, stdout=PIPE, stderr=PIPE) if c.exit_status: raise GetUpstreamChangesetsFailure( "%s returned status %d saying\n%s" % (str(c), c.exit_status, err.read())) err = None try: msg = logparser.parse(out) except Exception, err: pass if not err and msg.is_multipart(): err = "unable to parse log description" if not err and update and msg.has_key('Continuation-of'): err = "in-version continuations not supported" if err: raise GetUpstreamChangesetsFailure(str(err)) date = self.__parse_date(msg['Date'], msg['Standard-date']) author = msg['Creator'] revision = fqrev logmsg = [msg['Summary']] s = msg.get('Keywords', "").strip() if s: logmsg.append('Keywords: ' + s) s = msg.get_payload().strip() if s: logmsg.append(s) logmsg = '\n'.join(logmsg) changesets.append(Changeset(revision, date, author, logmsg))
def _checkoutUpstreamRevision(self, revision): force = False if revision == 'INITIAL': revision = self.__getNativeChanges(-1)[0]['change'] force = True p4 = self.__getP4() desc = p4.describe(revision, shortForm=True) p4.sync('@' + str(revision), force=force) ts = self.__parseDate(desc['date']) return Changeset(revision, ts, desc['user'], desc['description'])
def __collect(self, timestamp, author, changelog, entry, revision): """Register a change set about an entry.""" from vcpx.changes import Changeset key = (timestamp, author, changelog) if self.changesets.has_key(key): cs = self.changesets[key] for e in cs.entries: if e.name == entry: return e return cs.addEntry(entry, revision) else: cs = Changeset(_getGlobalCVSRevision(timestamp, author), timestamp, author, changelog) self.changesets[key] = cs return cs.addEntry(entry, revision)
def _changesetFromRevision(self, branch, revision_id): """ Generate changeset for the given Bzr revision """ from datetime import datetime from vcpx.changes import ChangesetEntry, Changeset from vcpx.tzinfo import FixedOffset, UTC revision = branch.repository.get_revision(revision_id) deltatree = branch.get_revision_delta(branch.revision_id_to_revno(revision_id)) entries = [] for delta in deltatree.added: e = ChangesetEntry(delta[0]) e.action_kind = ChangesetEntry.ADDED entries.append(e) for delta in deltatree.removed: e = ChangesetEntry(delta[0]) e.action_kind = ChangesetEntry.DELETED entries.append(e) for delta in deltatree.renamed: e = ChangesetEntry(delta[1]) e.action_kind = ChangesetEntry.RENAMED e.old_name = delta[0] entries.append(e) for delta in deltatree.modified: e = ChangesetEntry(delta[0]) e.action_kind = ChangesetEntry.UPDATED entries.append(e) if revision.timezone is not None: timezone = FixedOffset(revision.timezone / 60) else: timezone = UTC return Changeset(revision.revision_id, datetime.fromtimestamp(revision.timestamp, timezone), revision.committer, revision.message, entries)
def __parseCvsLog(self, branch, entries, since): """Parse a complete CVS log.""" from os.path import split, join from re import compile from time import strptime from datetime import datetime from vcpx.changes import Changeset revcount_regex = compile('\\bselected revisions:\\s*(\\d+)\\b') self.__currentdir = None file2rev2tags = {} tagcounts = {} branchnum = None while 1: l = self.__readline() while l and not l.startswith('RCS file: '): l = self.__readline() if not l.startswith('RCS file: '): break assert self.__currentdir is not None, \ "Missed 'cvs rlog: Logging XX' line" entry = join(self.__currentdir, split(l[10:-1])[1][:-2]) if entries is not None: while l and not l.startswith('head: '): l = self.__readline() assert l, "Missed 'head:' line" if branch is None: branchnum = normalize_cvs_rev(l[6:-1]) branchnum = rev2branch(branchnum) while l and not l == 'symbolic names:\n': l = self.__readline() assert l, "Missed 'symbolic names:' line" l = self.__readline() rev2tags = {} while l.startswith('\t'): tag, revision = l[1:-1].split(': ') tagcounts[tag] = tagcounts.get(tag, 0) + 1 revision = normalize_cvs_rev(revision) rev2tags.setdefault(revision, []).append(tag) if tag == branch: branchnum = revision l = self.__readline() # branchnum may still be None, if this file doesn't exist # on the requested branch. # filter out branch tags, and tags for revisions that are # on other branches. for revision in rev2tags.keys(): if is_branch(revision) or \ not branchnum or \ not cvs_revs_same_branch(revision,branchnum): del rev2tags[revision] file2rev2tags[entry] = rev2tags expected_revisions = None while l not in (self.inter_sep, self.intra_sep): m = revcount_regex.search(l) if m is not None: expected_revisions = int(m.group(1)) l = self.__readline() last = previous = None found_revisions = 0 while (l <> self.inter_sep or not self.__readline(True).startswith('revision ')): cs = self.__parseRevision(entry) if cs is None: break date, author, changelog, e, rev, state, newentry = cs # CVS seems to sometimes mess up what it thinks the branch is... if branchnum and not cvs_revs_same_branch( normalize_cvs_rev(rev), branchnum): self.log.warning( "Skipped revision %s on entry %s " "as revision didn't match branch revision %s " "for branch %s" % (str(normalize_cvs_rev(rev)), entry, str(branchnum), str(branch))) expected_revisions -= 1 continue # Skip spurious entries added in a branch if not (rev == '1.1' and state == 'dead' and changelog.startswith('file ') and ' was initially added on branch ' in changelog): last = self.__collect(date, author, changelog, e, rev) if state == 'dead': last.action_kind = last.DELETED elif newentry: last.action_kind = last.ADDED else: last.action_kind = last.UPDATED found_revisions = found_revisions + 1 if previous and last.action_kind == last.DELETED: # For unknown reasons, sometimes there are two dead # revision is a row. if previous.action_kind <> last.DELETED: previous.action_kind = previous.ADDED previous = last if expected_revisions <> found_revisions: self.log.warning('Expecting %s revisions, found %s', expected_revisions, found_revisions) # If entries is not given, don't try to desume tags information if entries is None: return # Determine the current revision of each live # (i.e. non-deleted) entry. state = dict(entries.getFileVersions()) # before stepping through changes, see if the initial state is # taggable. If so, add an initial changeset that does nothing # but tag, using the date of the last revision tailor imported # on its previous run. There's no way to tell when the tag # was really applied, so we don't know if it was seen on the # last run or not. Before applying the tag on the other end, # we'll have to check whether it's already been applied. tags = self.__getApplicableTags(state, file2rev2tags, tagcounts) if tags: if since == None: # I think this could only happen if the CVS repo was # tagged before any files were added to it. We could # probably get a better date by looking at when the # files were added, but who cares. timestamp = datetime(1900, 1, 1).replace(tzinfo=UTC) else: # "since" is a revision name read from the state file, # which means it was originally generated by # getGlobalCVSRevision. The format string "%Y-%m-%d # %H:%M:%S" matches the format generated by the implicit # call to timestamp.__str__() in getGlobalCVSRevision. y, m, d, hh, mm, ss, d1, d2, d3 = strptime( since, "%Y-%m-%d %H:%M:%S") timestamp = datetime(y, m, d, hh, mm, ss, 0, UTC) author = "unknown tagger" changelog = "tag %s %s" % (timestamp, tags) key = (timestamp, author, changelog) self.changesets[key] = Changeset(_getGlobalCVSRevision( timestamp, author), timestamp, author, changelog, tags=tags) # Walk through the changesets, identifying ones that result in # a state with a tag. Add that info to the changeset. for cs in self.__iter__(): self.__updateState(state, cs) cs.tags = self.__getApplicableTags(state, file2rev2tags, tagcounts)
def endElement(self, name): if name == 'logentry': # Sort the paths to make tests easier self.current['entries'].sort(lambda a, b: cmp(a.name, b.name)) # Eliminate "useless" entries: SVN does not have atomic # renames, but rather uses a ADD+RM duo. # # So cycle over all entries of this patch, discarding # the deletion of files that were actually renamed, and # at the same time change related entry from ADDED to # RENAMED. # When copying a directory from another location in the # repository (outside the tracked tree), SVN will report files # below this dir that are not being committed as being # removed. # We thus need to change the action_kind for all entries # that are below a dir that was "copyfrom" from a path # outside of this module: # D -> Remove entry completely (it's not going to be in here) # (M,A,R) -> A mv_or_cp = {} for e in self.current['entries']: if e.action_kind == e.ADDED and e.old_name is not None: mv_or_cp[e.old_name] = e def parent_was_copied(n): for p in self.copies: if n.startswith(p + '/'): return True return False # Find renames from deleted directories: # $ svn mv dir/a.txt a.txt # $ svn del dir def check_renames_from_dir(name): for e in mv_or_cp.values(): if e.old_name.startswith(name + '/'): e.action_kind = e.RENAMED entries = [] entries2 = [] for e in self.current['entries']: if e.action_kind == e.DELETED: if mv_or_cp.has_key(e.name): mv_or_cp[e.name].action_kind = e.RENAMED else: check_renames_from_dir(e.name) entries2.append(e) elif e.action_kind == 'R': # In svn parlance, 'R' means Replaced: a typical # scenario is # $ svn mv a.txt b.txt # $ touch a.txt # $ svn add a.txt if mv_or_cp.has_key(e.name): mv_or_cp[e.name].action_kind = e.RENAMED else: check_renames_from_dir(e.name) e.action_kind = e.ADDED entries2.append(e) elif parent_was_copied(e.name): if e.action_kind != e.DELETED: e.action_kind = e.ADDED entries.append(e) else: entries.append(e) # Changes sort: first MODIFY|ADD|RENAME, than REPLACE|DELETE for e in entries2: entries.append(e) svndate = self.current['date'] # 2004-04-16T17:12:48.000000Z y, m, d = map(int, svndate[:10].split('-')) hh, mm, ss = map(int, svndate[11:19].split(':')) ms = int(svndate[20:-1]) timestamp = datetime(y, m, d, hh, mm, ss, ms, UTC) changeset = Changeset(self.current['revision'], timestamp, self.current.get('author'), self.current['msg'], entries) self.changesets.append(changeset) self.current = None elif name in ['author', 'date', 'msg']: self.current[name] = ''.join(self.current_field) elif name == 'path': path = ''.join(self.current_field) entrypath = get_entry_from_path(path) if entrypath: entry = ChangesetEntry(entrypath) if type(self.current_path_action) == type(()): self.copies.append(entry.name) old = get_entry_from_path(self.current_path_action[1]) if old: entry.action_kind = self.ACTIONSMAP[ self.current_path_action[0]] entry.old_name = old self.renamed[entry.old_name] = True else: entry.action_kind = entry.ADDED else: entry.action_kind = self.ACTIONSMAP[ self.current_path_action] self.current['entries'].append(entry)
def changesets_from_cvsps(log, sincerev=None): """ Parse CVSps log. """ from datetime import datetime from vcpx.changes import Changeset, ChangesetEntry from vcpx.repository.cvs import compare_cvs_revs # cvsps output sample: ## --------------------- ## PatchSet 1500 ## Date: 2004/05/09 17:54:22 ## Author: grubert ## Branch: HEAD ## Tag: (none) ## Log: ## Tell the reason for using mbox (not wrapping long lines). ## ## Members: ## docutils/writers/latex2e.py:1.78->1.79 l = None while 1: l = log.readline() if l <> '---------------------\n': break l = log.readline() assert l.startswith('PatchSet '), "Parse error: %s" % l pset = {} pset['revision'] = l[9:-1].strip() l = log.readline() while not l.startswith('Log:'): field, value = l.split(':', 1) pset[field.lower()] = value.strip() l = log.readline() msg = [] l = log.readline() msg.append(l) l = log.readline() while l <> 'Members: \n': msg.append(l) l = log.readline() assert l.startswith('Members:'), "Parse error: %s" % l entries = [] l = log.readline() seen = {} while l.startswith('\t'): if not sincerev or (sincerev < int(pset['revision'])): # Cannot use split here, file may contain ':' cpos = l.rindex(':') file = l[1:cpos] revs = l[cpos + 1:-1] fromrev, torev = revs.strip().split('->') # Due to the fuzzy mechanism, cvsps may group # together two commits on a single entry, thus # giving something like: # # Normalizer.py:1.12->1.13 # Registry.py:1.22->1.23 # Registry.py:1.21->1.22 # Stopwords.py:1.9->1.10 # # Collapse those into a single one. e = seen.get(file) if not e: e = ChangesetEntry(file) e.old_revision = fromrev e.new_revision = torev seen[file] = e entries.append(e) else: if compare_cvs_revs(e.old_revision, fromrev) > 0: e.old_revision = fromrev if compare_cvs_revs(e.new_revision, torev) < 0: e.new_revision = torev if fromrev == 'INITIAL': e.action_kind = e.ADDED elif "(DEAD)" in torev: e.action_kind = e.DELETED e.new_revision = torev[:torev.index('(DEAD)')] else: e.action_kind = e.UPDATED l = log.readline() if not sincerev or (sincerev < int(pset['revision'])): cvsdate = pset['date'] y, m, d = map(int, cvsdate[:10].split('/')) hh, mm, ss = map(int, cvsdate[11:19].split(':')) timestamp = datetime(y, m, d, hh, mm, ss, 0, UTC) pset['date'] = timestamp yield Changeset(pset['revision'], timestamp, pset['author'], ''.join(msg), entries)
def _changesetForRevision(self, repo, revision): from datetime import datetime from vcpx.changes import Changeset, ChangesetEntry from vcpx.tzinfo import FixedOffset entries = [] node = self._getNode(repo, revision) parents = repo.changelog.parents(node) nodecontent = repo.changelog.read(node) # hg 0.9.5+ returns a tuple of six elements, last seems useless for us (manifest, user, date, files, message) = nodecontent[:5] dt, tz = date date = datetime.fromtimestamp(dt, FixedOffset( -tz / 60)) # note the minus sign! manifest = repo.manifest.read(manifest) # To find adds, we get the manifests of any parents. If a file doesn't # occur there, it's new. pms = {} for parent in repo.changelog.parents(node): pms.update(repo.manifest.read(repo.changelog.read(parent)[0])) # if files contains only '.hgtags', this is probably a tag cset. # Tailor appears to only support tagging the current version, so only # pass on tags that are for the immediate parents of the current node tags = None if files == ['.hgtags']: tags = [ tag for (tag, tagnode) in repo.tags().iteritems() if tagnode in parents ] # Don't include the file itself in the changeset. It's only useful # to mercurial, and if we do end up making a tailor round trip # the nodes will be wrong anyway. if '.hgtags' in files: files.remove('.hgtags') if pms.has_key('.hgtags'): del pms['.hgtags'] for f in files: e = ChangesetEntry(f) # find renames fl = repo.file(f) oldname = f in manifest and fl.renamed(manifest[f]) if oldname: e.action_kind = ChangesetEntry.RENAMED e.old_name = oldname[0] # hg copy can copy the same file to multiple destinations # Currently this is handled as multiple renames. It would # probably be better to have ChangesetEntry.COPIED. if pms.has_key(oldname[0]): pms.pop(oldname[0]) else: if pms.has_key(f): e.action_kind = ChangesetEntry.UPDATED else: e.action_kind = ChangesetEntry.ADDED entries.append(e) for df in [ file for file in pms.iterkeys() if not manifest.has_key(file) ]: e = ChangesetEntry(df) e.action_kind = ChangesetEntry.DELETED entries.append(e) from mercurial.node import hex revision = hex(node) return Changeset(revision, date, user, message, entries, tags=tags)
def __adaptChanges(self, changes): # most of the info about a changeset is filled in later return [ Changeset(str(c['change']), None, c['user'], None) for c in changes ]
def _parseDarcsPull(self, output): """Process 'darcs pull' output to Changesets. """ from datetime import datetime from time import strptime from sha import new from vcpx.changes import Changeset l = output.readline() while l and not (l.startswith('Would pull the following changes:') or l == 'No remote changes to pull in!\n'): l = output.readline() if l <> 'No remote changes to pull in!\n': ## Sat Jul 17 01:22:08 CEST 2004 lele@nautilus ## * Refix _getUpstreamChangesets for darcs fsep = re.compile('[ :]+') l = output.readline() while not l.startswith('Making no changes: this is a dry run.'): # Assume it's a line like # Sun Jan 2 00:24:04 UTC 2005 [email protected] # Use a regular expression matching multiple spaces or colons # to split it, and use the first 7 fields to build up a datetime. pieces = fsep.split(l.rstrip(), 8) assert len( pieces) >= 7, "Cannot parse %r as a patch timestamp" % l date = ' '.join(pieces[:8]) author = pieces[8] y, m, d, hh, mm, ss, d1, d2, d3 = strptime( date, "%a %b %d %H %M %S %Z %Y") date = datetime(y, m, d, hh, mm, ss, 0, UTC) l = output.readline().rstrip() assert (l.startswith(' *') or l.startswith(' UNDO:') or l.startswith(' tagged')), \ "Got %r but expected the start of the log" % l if l.startswith(' *'): name = l[4:] else: name = l[2:] changelog = [] l = output.readline() while l.startswith(' '): changelog.append(l[2:-1]) l = output.readline() cset = Changeset(name, date, author, '\n'.join(changelog)) compactdate = date.strftime("%Y%m%d%H%M%S") if name.startswith('UNDO: '): name = name[6:] inverted = 't' else: inverted = 'f' if name.startswith('tagged '): name = name[7:] if cset.tags is None: cset.tags = [name] else: cset.tags.append(name) name = "TAG " + name phash = new() phash.update(name) phash.update(author) phash.update(compactdate) phash.update(''.join(changelog)) phash.update(inverted) cset.darcs_hash = '%s-%s-%s.gz' % (compactdate, new(author).hexdigest()[:5], phash.hexdigest()) yield cset while not l.strip(): l = output.readline()
def testParsePull(self): """Verify basic darcs pull parser behaviour""" from vcpx.changes import Changeset output = self.getDarcsOutput('darcs-pull_parser_test') hashes = self.getDarcsOutput('darcs-pull_parser_test', ext='.hashes') class FauxRepository(object): name = 'foo' dswd = DarcsSourceWorkingDir(FauxRepository()) results = list(dswd._parseDarcsPull(output)) expected_changesets = [ Changeset( 'Monotone add is no longer recursive by default ' '(as of 2006-11-02).', datetime(2006, 12, 12, 05, 30, 20, tzinfo=UTC), '*****@*****.**', 'Use add --recursive when adding subtrees.'), Changeset('Fix ticket #87', datetime(2006, 12, 14, 23, 45, 04, tzinfo=UTC), 'Edgar Alves <*****@*****.**>', ''), Changeset( "Don't assume the timestamp in darcs log is exactly " "28 chars long", datetime(2006, 11, 17, 20, 26, 28, tzinfo=UTC), '*****@*****.**', ''), Changeset('tagged Version 0.9.27', datetime(2006, 12, 11, 21, 07, 48, tzinfo=UTC), '*****@*****.**', ''), Changeset( 'darcs: factor parsing from process invocation in DarcsSourceWorkingDir._getUpstreamChangesets', datetime(2007, 1, 6, 1, 52, 50, tzinfo=UTC), 'Kevin Turner <*****@*****.**>', ''), ] for changeset, expected_hash in zip(expected_changesets, hashes): changeset.darcs_hash = expected_hash.strip() self.failUnlessEqual(len(expected_changesets), len(results)) for expected, result in zip(expected_changesets, results): self.failUnlessEqual(expected, result, "%s != %s" % (expected, result)) self.failUnlessEqual( expected.darcs_hash, result.darcs_hash, 'hash failed for %s\n %s !=\n %s' % (result, expected.darcs_hash, result.darcs_hash)) output = self.getDarcsOutput('darcs-pull_parser_test2') results = list(dswd._parseDarcsPull(output)) first = results[0] self.failUnlessEqual(first.revision, 'Added some basic utility functions') self.failUnlessEqual(first.date, datetime(2003, 10, 10, 16, 23, 44, tzinfo=UTC)) self.failUnlessEqual(first.author, 'John Goerzen <*****@*****.**>') self.failUnlessEqual( first.log, '\n\n([email protected]/tla-buildpackage--head--1.0--patch-2)' ) last = results[-1] self.failUnlessEqual( last.log, 'Keywords:\n\nAdded some code in Python to get things going.\n')