Beispiel #1
0
def set_annex_description( repo, annex_status=None, description=None, force=False ):
    """Sets repo's git annex description if not already set.

    NOTE: This needs to run git annex status, which takes some time.
     
    New repo: git annex init "REPONAME"
    Existing repo: git annex describe here "REPONAME"
     
    Descriptions should be chosen/generated base on the following heuristic:
    - Input to description argument of function.
    - If on USB device, the drive label of the device.
    - Hostname of machine, unless it is pnr (used by partner VMs).
    - If hostname is pnr, pnr:DOMAIN where DOMAIN is the domain portion of the git config user.email
    
    @param repo: A GitPython Repo object
    @param annex_status: (optional) Output of "git annex status" (saves some time).
    @param description: Manually supply a new description.
    @param force: Boolean Apply a new description even if one already exists.
    @return String description if new one was created/applied or None
    """
    desc = None
    PARTNER_HOSTNAME = 'pnr'
    annex_description = get_annex_description(repo, annex_status)
    # keep existing description unless forced
    if (not annex_description) or (force == True):
        if description:
            desc = description
        else:
            # gather information
            drive_label = storage.drive_label(repo.working_dir)
            hostname = socket.gethostname()
            user_mail = repo.git.config('user.email')
            # generate description
            desc = _make_annex_description(
                drive_label=drive_label,
                hostname=hostname, partner_host=PARTNER_HOSTNAME,
                mail=user_mail)
        if desc:
            # apply description
            logging.debug('git annex describe here %s' % desc)
            repo.git.annex('describe', 'here', desc)
    return desc
Beispiel #2
0
def sync(user_name, user_mail, collection):
    """Sync repo with bare clone on hub server; replaces git-annex-sync.
    
    Git-annex has a "sync" command for communicating annex changes between
    repositories, but it is designed to be used between non-bare repositories.
    Normally Git does not support pushing to non-bare repositories, and
    git-annex does some trickery involving "synced/BRANCH" branches to make
    this work.
    Reference: http://git-annex.branchable.com/sync/
    
    When git-annex-sync is used between a non-bare repo and a bare repo
    (e.g. between a local repo and our hub server running Gitolite),
    the "synced/master" branches do not get merged in to master and syncing
    no longer works.  Therefore it is necessary to sync manually.
    
    If you think you want to use git-annex-sync, remember that we tried this
    in commit 1857a7aa3f and it did not work and we reverted to manual syncing.
    
    @param user_name: Username for use in changelog, git log
    @param user_mail: User email address for use in changelog, git log
    @param collection: Collection
    @return: message ('ok' if successful)
    """
    repo = dvcs.repository(collection.path, user_name, user_mail)
    logging.debug('repo: %s' % repo)
    drive_label = storage.drive_label(repo.working_dir)
    dvcs.annex_set_description(repo,
                               dvcs.annex_status(repo),
                               drive_label=drive_label)
    dvcs.remote_add(repo, collection.git_url, config.GIT_REMOTE_NAME)
    # list remotes
    logging.debug('remotes')
    for remote in dvcs.remotes(repo):
        logging.debug('- %s %s' % (remote['name'], remote['target']))
    # sync
    logging.debug('git annex sync')
    out = repo.git.annex('sync')
    logging.debug(out)
    return 0, 'ok'
Beispiel #3
0
def clone(user_name, user_mail, identifier, dest_path):
    """Command-line function for cloning an existing collection.
    
    Clones existing collection object from workbench server.
    
    @param user_name: Username for use in changelog, git log
    @param user_mail: User email address for use in changelog, git log
    @param identifier: Identifier
    @param dest_path: str
    @return: message ('ok' if successful)
    """
    git_url = '{}:{}.git'.format(config.GITOLITE, identifier.id)
    repo = git.Repo.clone_from(git_url, dest_path)
    logging.debug('    git clone {}'.format(git_url))
    if repo:
        logging.debug('    OK')
    else:
        logging.error('    COULD NOT CLONE!')
        return 1, 'could not clone'
    if os.path.exists(identifier.path_abs('git')):
        logging.debug('    .git/ is present')
    else:
        logging.error('    .git/ IS MISSING!')
        return 1, '.git/ is missing'
    # git annex init if not already existing
    if not os.path.exists(identifier.path_abs('annex')):
        logging.debug('    git annex init')
        repo.git.annex('init')
    #
    repo.git.checkout('master')
    dvcs.git_set_configs(repo, user_name, user_mail)
    dvcs.annex_set_configs(repo, user_name, user_mail)
    drive_label = storage.drive_label(repo.working_dir)
    dvcs.annex_set_description(repo,
                               dvcs.annex_status(repo),
                               drive_label=drive_label)
    dvcs.remote_add(repo, git_url, config.GIT_REMOTE_NAME)
    return 0, 'ok'
Beispiel #4
0
def sync(user_name, user_mail, collection):
    """Sync repo with bare clone on hub server; replaces git-annex-sync.
    
    Git-annex has a "sync" command for communicating annex changes between
    repositories, but it is designed to be used between non-bare repositories.
    Normally Git does not support pushing to non-bare repositories, and
    git-annex does some trickery involving "synced/BRANCH" branches to make
    this work.
    Reference: http://git-annex.branchable.com/sync/
    
    When git-annex-sync is used between a non-bare repo and a bare repo
    (e.g. between a local repo and our hub server running Gitolite),
    the "synced/master" branches do not get merged in to master and syncing
    no longer works.  Therefore it is necessary to sync manually.
    
    If you think you want to use git-annex-sync, remember that we tried this
    in commit 1857a7aa3f and it did not work and we reverted to manual syncing.
    
    @param user_name: Username for use in changelog, git log
    @param user_mail: User email address for use in changelog, git log
    @param collection: Collection
    @return: message ('ok' if successful)
    """
    repo = dvcs.repository(collection.path, user_name, user_mail)
    logging.debug('repo: %s' % repo)
    drive_label = storage.drive_label(repo.working_dir)
    dvcs.annex_set_description(repo, dvcs.annex_status(repo), drive_label=drive_label)
    dvcs.remote_add(repo, collection.git_url, config.GIT_REMOTE_NAME)
    # list remotes
    logging.debug('remotes')
    for remote in dvcs.remotes(repo):
        logging.debug('- %s %s' % (remote['name'], remote['target']))
    # sync
    logging.debug('git annex sync')
    out = repo.git.annex('sync')
    logging.debug(out)
    return 0,'ok'
Beispiel #5
0
def clone(user_name, user_mail, identifier, dest_path):
    """Command-line function for cloning an existing collection.
    
    Clones existing collection object from workbench server.
    
    @param user_name: Username for use in changelog, git log
    @param user_mail: User email address for use in changelog, git log
    @param identifier: Identifier
    @param dest_path: str
    @return: message ('ok' if successful)
    """
    git_url = '{}:{}.git'.format(config.GITOLITE, identifier.id)
    repo = git.Repo.clone_from(git_url, dest_path)
    logging.debug('    git clone {}'.format(git_url))
    if repo:
        logging.debug('    OK')
    else:
        logging.error('    COULD NOT CLONE!')
        return 1,'could not clone'
    if os.path.exists(identifier.path_abs('git')):
        logging.debug('    .git/ is present')
    else:
        logging.error('    .git/ IS MISSING!')
        return 1,'.git/ is missing'
    # git annex init if not already existing
    if not os.path.exists(identifier.path_abs('annex')):
        logging.debug('    git annex init')
        repo.git.annex('init')
    #
    repo.git.checkout('master')
    dvcs.git_set_configs(repo, user_name, user_mail)
    dvcs.annex_set_configs(repo, user_name, user_mail)
    drive_label = storage.drive_label(repo.working_dir)
    dvcs.annex_set_description(repo, dvcs.annex_status(repo), drive_label=drive_label)
    dvcs.remote_add(repo, git_url, config.GIT_REMOTE_NAME)
    return 0,'ok'
Beispiel #6
0
def create(user_name, user_mail, identifier, templates, agent=''):
    """Command-line function for creating a new collection.
    
    Clones a blank collection object from workbench server, adds files, commits.
    
    - clones new repo from gitolite server
    # Easier to have Gitolite create repo then clone (http://sitaramc.github.com/gitolite/repos.html)
    # than to add existing to Gitolite (http://sitaramc.github.com/gitolite/rare.html#existing).
    local requests CID from workbench API
    background:collection init: $ collection -cCID -oinit]
    background:collection init: $ git clone git@mits:ddr-ORG-C
        $ git clone git@mits:ddr-densho-1
        Cloning into 'ddr-densho-1'...
        Initialized empty Git repository in /home/git/repositories/ddr-densho-1.git/
        warning: You appear to have cloned an empty repository.
    background:entity init: $ git annex init
    background:entity init: $ git add changelog control ead.xml .gitignore
    background:entity init: $ git commit
    
    @param user_name: Username for use in changelog, git log
    @param user_mail: User email address for use in changelog, git log
    @param identifier: Identifier
    @param templates: List of metadata templates (absolute paths).
    @param agent: (optional) Name of software making the change.
    @return: message ('ok' if successful)
    """
    gitolite = dvcs.Gitolite(config.GITOLITE)
    gitolite.initialize()
    if identifier.id in gitolite.collections():
        raise Exception("'%s' already exists -- clone instead." % identifier.id)
    git_url = '{}:{}.git'.format(config.GITOLITE, identifier.id)
    repo = git.Repo.clone_from(git_url, identifier.path_abs())
    logging.debug('    git clone {}'.format(git_url))
    if repo:
        logging.debug('    OK')
    else:
        logging.error('    COULD NOT CLONE!')
    if os.path.exists(identifier.path_abs('git')):
        logging.debug('    .git/ is present')
    else:
        logging.error('    .git/ IS MISSING!')
    # there is no master branch at this point
    dvcs.remote_add(repo, git_url, config.GIT_REMOTE_NAME)
    dvcs.git_set_configs(repo, user_name, user_mail)
    dvcs.annex_set_configs(repo, user_name, user_mail)
    git_files = []
    
    # copy template files to collection
    for src in templates:
        if os.path.exists(src):
            dst = os.path.join(identifier.path_abs(), os.path.basename(src))
            logging.debug('cp %s, %s' % (src, dst))
            shutil.copy(src, dst)
            if os.path.exists(dst):
                git_files.append(dst)
            else:
                logging.error('COULD NOT COPY %s' % src)

    # instantiate now that we have collection dir and some templates
    object_class = identifier.object_class()
    collection = object_class(identifier.path_abs())
    
    # add control, .gitignore, changelog
    control   = collection.control()
    gitignore = collection.gitignore()
    
    # prep log entries
    changelog_messages = ['Initialized collection {}'.format(collection.id)]
    if agent:
        changelog_messages.append('@agent: %s' % agent)
    commit_message = dvcs.compose_commit_message(changelog_messages[0], agent=agent)
    
    write_changelog_entry(collection.changelog_path,
                          changelog_messages,
                          user_name, user_mail)
    if os.path.exists(control.path):
        git_files.append(control.path_rel)
    else:
        logging.error('    COULD NOT CREATE control')
    if os.path.exists(collection.gitignore_path):
        git_files.append(collection.gitignore_path_rel)
    else:
        logging.error('    COULD NOT CREATE .gitignore')
    if os.path.exists(collection.changelog_path):
        git_files.append(collection.changelog_path_rel)
    else:
        logging.error('    COULD NOT CREATE changelog')
    
    # add files and commit
    repo = commit_files(repo, commit_message, git_files, [])
    # master branch should be created by this point
    # git annex init
    logging.debug('    git annex init')
    repo.git.annex('init')
    if os.path.exists(os.path.join(collection.path, '.git', 'annex')):
        logging.debug('    .git/annex/ OK')
    else:
        logging.error('    .git/annex/ IS MISSING!')
    
    # manual version of git-annex-sync - see notes for DDR.commands.sync.
    logging.debug('git push %s git-annex' % config.GIT_REMOTE_NAME)
    repo.git.checkout('git-annex')
    repo.git.push(config.GIT_REMOTE_NAME, 'git-annex')
    logging.debug('git push %s master' % config.GIT_REMOTE_NAME)
    repo.git.checkout('master')
    repo.git.push(config.GIT_REMOTE_NAME, 'master')
    logging.debug('OK')
    
    drive_label = storage.drive_label(repo.working_dir)
    dvcs.annex_set_description(repo, dvcs.annex_status(repo), drive_label=drive_label)
    return 0,'ok'
Beispiel #7
0
def create(user_name, user_mail, identifier, templates, agent=''):
    """Command-line function for creating a new collection.
    
    Clones a blank collection object from workbench server, adds files, commits.
    
    - clones new repo from gitolite server
    # Easier to have Gitolite create repo then clone (http://sitaramc.github.com/gitolite/repos.html)
    # than to add existing to Gitolite (http://sitaramc.github.com/gitolite/rare.html#existing).
    local requests CID from workbench API
    background:collection init: $ collection -cCID -oinit]
    background:collection init: $ git clone git@mits:ddr-ORG-C
        $ git clone git@mits:ddr-densho-1
        Cloning into 'ddr-densho-1'...
        Initialized empty Git repository in /home/git/repositories/ddr-densho-1.git/
        warning: You appear to have cloned an empty repository.
    background:entity init: $ git annex init
    background:entity init: $ git add changelog control ead.xml .gitignore
    background:entity init: $ git commit
    
    @param user_name: Username for use in changelog, git log
    @param user_mail: User email address for use in changelog, git log
    @param identifier: Identifier
    @param templates: List of metadata templates (absolute paths).
    @param agent: (optional) Name of software making the change.
    @return: message ('ok' if successful)
    """
    gitolite = dvcs.Gitolite(config.GITOLITE)
    gitolite.initialize()
    if identifier.id in gitolite.collections():
        raise Exception("'%s' already exists -- clone instead." % identifier.id)
    git_url = '{}:{}.git'.format(config.GITOLITE, identifier.id)
    repo = git.Repo.clone_from(git_url, identifier.path_abs())
    logging.debug('    git clone {}'.format(git_url))
    if repo:
        logging.debug('    OK')
    else:
        logging.error('    COULD NOT CLONE!')
    if os.path.exists(identifier.path_abs('git')):
        logging.debug('    .git/ is present')
    else:
        logging.error('    .git/ IS MISSING!')
    # there is no master branch at this point
    dvcs.remote_add(repo, git_url, config.GIT_REMOTE_NAME)
    dvcs.git_set_configs(repo, user_name, user_mail)
    dvcs.annex_set_configs(repo, user_name, user_mail)
    git_files = []
    
    # copy template files to collection
    for src in templates:
        if os.path.exists(src):
            dst = os.path.join(identifier.path_abs(), os.path.basename(src))
            logging.debug('cp %s, %s' % (src, dst))
            shutil.copy(src, dst)
            if os.path.exists(dst):
                git_files.append(dst)
            else:
                logging.error('COULD NOT COPY %s' % src)

    # instantiate now that we have collection dir and some templates
    object_class = identifier.object_class()
    collection = object_class(identifier.path_abs())
    
    # add control, .gitignore, changelog
    control   = collection.control()
    gitignore = collection.gitignore()
    
    # prep log entries
    changelog_messages = ['Initialized collection {}'.format(collection.id)]
    if agent:
        changelog_messages.append('@agent: %s' % agent)
    commit_message = dvcs.compose_commit_message(changelog_messages[0], agent=agent)
    
    write_changelog_entry(collection.changelog_path,
                          changelog_messages,
                          user_name, user_mail)
    if os.path.exists(control.path):
        git_files.append(control.path_rel)
    else:
        logging.error('    COULD NOT CREATE control')
    if os.path.exists(collection.gitignore_path):
        git_files.append(collection.gitignore_path_rel)
    else:
        logging.error('    COULD NOT CREATE .gitignore')
    if os.path.exists(collection.changelog_path):
        git_files.append(collection.changelog_path_rel)
    else:
        logging.error('    COULD NOT CREATE changelog')
    
    # add files and commit
    repo = commit_files(repo, commit_message, git_files, [])
    # master branch should be created by this point
    # git annex init
    logging.debug('    git annex init')
    repo.git.annex('init')
    if os.path.exists(os.path.join(collection.path, '.git', 'annex')):
        logging.debug('    .git/annex/ OK')
    else:
        logging.error('    .git/annex/ IS MISSING!')
    
    # manual version of git-annex-sync - see notes for DDR.commands.sync.
    logging.debug('git push %s git-annex' % config.GIT_REMOTE_NAME)
    repo.git.checkout('git-annex')
    repo.git.push(config.GIT_REMOTE_NAME, 'git-annex')
    logging.debug('git push %s master' % config.GIT_REMOTE_NAME)
    repo.git.checkout('master')
    repo.git.push(config.GIT_REMOTE_NAME, 'master')
    logging.debug('OK')
    
    drive_label = storage.drive_label(repo.working_dir)
    dvcs.annex_set_description(repo, dvcs.annex_status(repo), drive_label=drive_label)
    return 0,'ok'