def collect_commits(self):
     self.exported_stores = []
     for repo_num, input_repo in enumerate(self.input_repos, start=1):
         repo_name = basename(input_repo)
         remember_commits_filename = os.path.join(abspath(input_repo), ".git", "splice-commits.json")
         exported_store = self.open_export_file(repo_name, ".git-fast-export", "export", overwrite=True)
         self.exported_stores.append(exported_store)
         logging.info("Doing export on repository %s [%d]", repo_name, repo_num)
         stored_commits = []
         export_mark_file = join(abspath(input_repo), ".git", "splice-export-marks")
         export_mark_args = ["--all", "--export-marks=%s" % export_mark_file]
         if exists(export_mark_file):
             export_mark_args.append("--import-marks=%s" % export_mark_file)
         export = fast_export_output(input_repo, export_mark_args)
         logging.info("_next_id is %d", _IDS._next_id)
         try:
             collect = FastExportFilter(reset_callback=lambda r: self.skip_reset(r),
                                        commit_callback=lambda c: stored_commits.append(self.jsonize_commit(c)))
             logging.info("offset for repo %d is %d", repo_num, collect._id_offset)
             kwargs = {}
             if repo_num == 2:
                 kwargs["id_offset"] = 8
             collect.run(export.stdout, exported_store.file, **kwargs)
             exported_store.close()
             with open(remember_commits_filename, 'wb') as remember_commits_file:
                 json.dump(stored_commits, remember_commits_file, indent=4)
         except Exception, e:
             logging.error("Error in memorizing export for %s [%d]: %s", input_repo, repo_num, e)
             raise
         # TODO: handle the fact that marks are getting messed up by the two repos on an incremental import
         self.remember_commits(repo_num, stored_commits)
         logging.info("id_offset is %d", collect._id_offset)
         self.export_id_offsets[repo_num] = collect._id_offset
def main(args):
    branch_renames = dict(args.branch_renames)
    branch_pattern_renames = args.branch_pattern_renames
    branch_excludes = dict(args.branch_excludes)
    file_renames = dict([(src, target) for src, target in args.file_renames if not src.endswith("/")])
    dir_renames = [(src, target) for src, target in args.file_renames if src.endswith("/")]
    file_excludes = set([target for target in args.file_excludes if not target.endswith("/")])
    dir_excludes = [target for target in args.file_excludes if target.endswith("/")]
    branches_found = set()
    files_found = set()
    def my_commit_callback(commit):
        branch = commit.branch
        for src_pattern, target in branch_pattern_renames:
            branch = re.sub(src_pattern, target, branch)
        if branch in branch_renames:
            branch = branch_renames[branch]
        elif branch in branch_excludes:
            if branch not in branches_found:
                logging.info("Excluding branch %s", branch)
                branches_found.add(branch)
            commit.skip()
            return
        if branch != commit.branch:
            if commit.branch not in branches_found:
                logging.info("Renaming branch %s to %s", commit.branch, branch)
                branches_found.add(commit.branch)
            commit.branch = branch
        new_file_changes, alter_commit = [], False
        for change in commit.file_changes:
            exclude_file = False
            filename = change.filename
            for src_dir, target_dir in dir_renames:
                if filename.startswith(src_dir):
                    filename = filename.replace(src_dir, target_dir, 1)
            for target_dir in dir_excludes:
                if filename.startswith(target_dir):
                    exclude_file = True
            if filename in file_renames:
                filename = file_renames[filename]
            if filename in file_excludes:
                exclude_file = True
            if exclude_file:
                if filename not in files_found:
                    logging.info("Excluding file %s", filename)
                    files_found.add(filename)
                alter_commit = True
            else:
                new_file_changes.append(change)
            if filename != change.filename:
                if change.filename not in files_found:
                    logging.info("Renaming file %s to %s", change.filename, filename)
                    files_found.add(change.filename)
                change.filename = filename
        if alter_commit:
            commit.file_changes = new_file_changes
    filter = FastExportFilter(commit_callback = my_commit_callback)
    filter.run()
Beispiel #3
0
    def run(self):
        self.target = fast_import_input(self.output_dir)

        input1 = fast_export_output(self.repo1)
        filter1 = FastExportFilter(
            reset_callback=lambda r: self.skip_reset(r),
            commit_callback=lambda c: self.hold_commit(c))
        filter1.run(input1.stdout, self.target.stdin)

        input2 = fast_export_output(self.repo2)
        filter2 = FastExportFilter(
            commit_callback=lambda c: self.weave_commit(c))
        filter2.run(input2.stdout, self.target.stdin)

        # Wait for git-fast-import to complete (only necessary since we passed
        # file objects to FastExportFilter.run; and even then the worst that
        # happens is git-fast-import completes after this python script does)
        self.target.stdin.close()
        self.target.wait()
  def run(self):
    self.target = fast_import_input(self.output_dir)

    input1 = fast_export_output(self.repo1)
    filter1 = FastExportFilter(reset_callback  = lambda r: self.skip_reset(r),
                               commit_callback = lambda c: self.hold_commit(c))
    filter1.run(input1.stdout, self.target.stdin)

    input2 = fast_export_output(self.repo2)
    filter2 = FastExportFilter(commit_callback = lambda c: self.weave_commit(c))
    filter2.run(input2.stdout, self.target.stdin)

    # Wait for git-fast-import to complete (only necessary since we passed
    # file objects to FastExportFilter.run; and even then the worst that
    # happens is git-fast-import completes after this python script does)
    self.target.stdin.close()
    self.target.wait()
#!/usr/bin/env python

import re

from git_fast_filter import Blob, Reset, FileChanges, Commit, FastExportFilter
from datetime import datetime, timedelta

def change_up_them_commits(commit):
  # Change the commit author
  if commit.author_name == "Copy N. Paste":
    commit.author_name = "Ima L. Oser"
    commit.author_email = "*****@*****.**"

  # Fix the author email
  commit.author_email = re.sub("@my.crp", "@my.corp", commit.author_email)

  # Fix the committer date (bad timezone conversion in initial import)
  commit.committer_date += timedelta(hours=-5)

  # Fix the commit message
  commit.message = re.sub("Marketing is staffed with pansies", "",
                          commit.message)

filter = FastExportFilter(commit_callback = change_up_them_commits)
filter.run()

def my_blob_callback(blob):
    global object_count
    object_count += 1
    print_progress()


def my_commit_callback(commit):
    global commit_count
    commit_count += 1
    print_progress()
    new_file_changes = []
    for change in commit.file_changes:
        if regexp.match(change.filename):
            change.filename = re.sub('^[^/]*/[^/]*/org', 'tests/org',
                                     change.filename)
            new_file_changes.append(change)
            #print commit.branch + ":" + change.filename

    commit.file_changes = new_file_changes


filter = FastExportFilter(blob_callback=my_blob_callback,
                          commit_callback=my_commit_callback)
filter.run(source_repo, target_repo)

end = datetime.datetime.now()

print "End : " + str(end - start)
  global object_count, commit_count, total_objects, total_commits
  print "\rRewriting commits... %d/%d  (%d objects)" \
        % (commit_count, total_commits, object_count),

def my_blob_callback(blob):
  global object_count
  object_count += 1
  print_progress()
  
def my_commit_callback(commit):
  global commit_count
  commit_count += 1
  print_progress()
  new_file_changes = []
  for change in commit.file_changes:
      if regexp.match(change.filename):
        change.filename = re.sub('^[^/]*/[^/]*/org','tests/org',change.filename)
        new_file_changes.append(change)
        #print commit.branch + ":" + change.filename

  commit.file_changes = new_file_changes
  
filter = FastExportFilter(blob_callback   = my_blob_callback,
                          commit_callback = my_commit_callback)
filter.run(source_repo, target_repo)

end = datetime.datetime.now()

print "End : " + str(end-start)

#!/usr/bin/env python

from git_fast_filter import Blob, Reset, FileChanges, Commit, FastExportFilter


def my_commit_callback(commit):
    if commit.branch == "refs/heads/master":
        commit.branch = "refs/heads/slave"


filter = FastExportFilter(commit_callback=my_commit_callback)
filter.run()
#!/usr/bin/env python

import re
from git_fast_filter import Blob, Reset, FileChanges, Commit, FastExportFilter


def strip_cvs_keywords(blob):
    # FIXME: Should first check if blob is a text file to avoid ruining
    # binaries.  Could use python.magic here, or just output blob.data to
    # the unix 'file' command
    pattern = r'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$'
    replacement = r'$\1$'
    blob.data = re.sub(pattern, replacement, blob.data)


filter = FastExportFilter(blob_callback=strip_cvs_keywords)
filter.run()
#!/usr/bin/env python

from git_fast_filter import Blob, Reset, FileChanges, Commit, FastExportFilter

def drop_file_by_contents(blob):
  bad_file_contents = 'The launch code is 1-2-3-4.'
  if blob.data == bad_file_contents:
    blob.skip()

def drop_files_by_name(commit):
  new_file_changes = []
  for change in commit.file_changes:
    if not change.filename.endswith('.doc'):
      new_file_changes.append(change)
  commit.file_changes = new_file_changes

filter = FastExportFilter(blob_callback   = drop_file_by_contents,
                          commit_callback = drop_files_by_name)
filter.run()
Beispiel #11
0
#!/usr/bin/env python

import re

from git_fast_filter import Blob, Reset, FileChanges, Commit, FastExportFilter
from datetime import datetime, timedelta


def change_up_them_commits(commit):
    # Change the commit author
    if commit.author_name == "Copy N. Paste":
        commit.author_name = "Ima L. Oser"
        commit.author_email = "*****@*****.**"

    # Fix the author email
    commit.author_email = re.sub("@my.crp", "@my.corp", commit.author_email)

    # Fix the committer date (bad timezone conversion in initial import)
    commit.committer_date += timedelta(hours=-5)

    # Fix the commit message
    commit.message = re.sub("Marketing is staffed with pansies", "",
                            commit.message)


filter = FastExportFilter(commit_callback=change_up_them_commits)
filter.run()
  def run(self):
  #############################################################################
    # Set members based on data from previous runs
    self._setup_files_and_excludes()

    # Setup the source and target processes. The source process will produce
    # fast-export output for the source repo, this output will be passed
    # through FastExportFilter which will manipulate the output using our
    # callbacks, finally, the manipulated output will be given to the
    # fast-import process and used to create the target repo.
    # (This should update sourcemarks and targetmarks)
    source = \
      fast_export_output(self._source_repo,
                       ["--export-marks=%s" % self._sourcemarks,
                        "--import-marks=%s" % self._sourcemarks]
                       + self._fast_export_args)
    target = \
      fast_import_input( self._target_repo,
                       ["--export-marks=%s" % self._targetmarks,
                        "--import-marks=%s" % self._targetmarks])

    filt = FastExportFilter(blob_callback   = lambda b: self.blob_callback(b),
                            commit_callback = lambda c: self.commit_callback(c))
    filt.run(source.stdout, target.stdin)

    # Show progress
    if self._show_progress:
      sys.stdout.write("\nWaiting for git fast-import to complete...")
      sys.stdout.flush()
    target.stdin.close()
    target.wait() # need to wait for fast-import process to finish
    if self._show_progress:
      sys.stdout.write("done.\n")

    # Record the sourcemarks and targetmarks -- 2 steps

    # Step 1: Make sure the source and target marks have the same mark numbers.
    # Not doing this would allow one end of the grafting to reuse a number
    # that would then be misconnected on the other side.
    sourcemaps = self._get_maps(self._sourcemarks)
    targetmaps = self._get_maps(self._targetmarks)
    for key in sourcemaps.keys():
      if key not in targetmaps:
        del sourcemaps[key]
    for key in targetmaps.keys():
      if key not in sourcemaps:
        del targetmaps[key]

    # Step 2: Record the data
    for set_obj in [(sourcemaps, self._sourcemarks),
                    (targetmaps, self._targetmarks)]:
      # get raw filename for source/target
      mapname = self._get_map_name(set_obj[1])

      # create refs/collab if it's not there
      if not os.path.isdir(os.path.dirname(mapname)):
        os.mkdir(os.path.dirname(mapname))

      # compute string content of commit-map
      content = ''.join([":%d %s\n" % (k,v) for k,v in set_obj[0].iteritems()])

      # record content in the object database
      record_content(self._collab_git_dir, mapname, content)

    # Check if we are running from the target
    if self._target_repo == '.':
      # Record the excludes and includes so they can be reused next time
      for set_obj in [(self._excludes, 'excludes'),
                      (self._includes, 'includes')]:
        filename = os.path.join(self._collab_git_dir, 'refs',
                                'collab', set_obj[1])
        record_content(self._collab_git_dir, filename,
                       '\n'.join(set_obj[0]) + '\n')

      # Record source_repo as the original repository
      filename = os.path.join(self._collab_git_dir, 'refs',
                              'collab', 'orig_repo')
      record_content(self._collab_git_dir, filename, self._source_repo+'\n')
    def run(self):
        #############################################################################
        # Set members based on data from previous runs
        self._setup_files_and_excludes()

        # Setup the source and target processes. The source process will produce
        # fast-export output for the source repo, this output will be passed
        # through FastExportFilter which will manipulate the output using our
        # callbacks, finally, the manipulated output will be given to the
        # fast-import process and used to create the target repo.
        # (This should update sourcemarks and targetmarks)
        source = \
          fast_export_output(self._source_repo,
                           ["--export-marks=%s" % self._sourcemarks,
                            "--import-marks=%s" % self._sourcemarks]
                           + self._fast_export_args)
        target = \
          fast_import_input( self._target_repo,
                           ["--export-marks=%s" % self._targetmarks,
                            "--import-marks=%s" % self._targetmarks])

        filt = FastExportFilter(
            blob_callback=lambda b: self.blob_callback(b),
            commit_callback=lambda c: self.commit_callback(c))
        filt.run(source.stdout, target.stdin)

        # Show progress
        if self._show_progress:
            sys.stdout.write("\nWaiting for git fast-import to complete...")
            sys.stdout.flush()
        target.stdin.close()
        target.wait()  # need to wait for fast-import process to finish
        if self._show_progress:
            sys.stdout.write("done.\n")

        # Record the sourcemarks and targetmarks -- 2 steps

        # Step 1: Make sure the source and target marks have the same mark numbers.
        # Not doing this would allow one end of the grafting to reuse a number
        # that would then be misconnected on the other side.
        sourcemaps = self._get_maps(self._sourcemarks)
        targetmaps = self._get_maps(self._targetmarks)
        for key in sourcemaps.keys():
            if key not in targetmaps:
                del sourcemaps[key]
        for key in targetmaps.keys():
            if key not in sourcemaps:
                del targetmaps[key]

        # Step 2: Record the data
        for set_obj in [(sourcemaps, self._sourcemarks),
                        (targetmaps, self._targetmarks)]:
            # get raw filename for source/target
            mapname = self._get_map_name(set_obj[1])

            # create refs/collab if it's not there
            if not os.path.isdir(os.path.dirname(mapname)):
                os.mkdir(os.path.dirname(mapname))

            # compute string content of commit-map
            content = ''.join(
                [":%d %s\n" % (k, v) for k, v in set_obj[0].iteritems()])

            # record content in the object database
            record_content(self._collab_git_dir, mapname, content)

        # Check if we are running from the target
        if self._target_repo == '.':
            # Record the excludes and includes so they can be reused next time
            for set_obj in [(self._excludes, 'excludes'),
                            (self._includes, 'includes')]:
                filename = os.path.join(self._collab_git_dir, 'refs', 'collab',
                                        set_obj[1])
                record_content(self._collab_git_dir, filename,
                               '\n'.join(set_obj[0]) + '\n')

            # Record source_repo as the original repository
            filename = os.path.join(self._collab_git_dir, 'refs', 'collab',
                                    'orig_repo')
            record_content(self._collab_git_dir, filename,
                           self._source_repo + '\n')