Example #1
0
def add_file_commit(entity, file_, repo, log, git_name, git_mail, agent):
    log.ok('add_file_commit(%s, %s, %s, %s, %s, %s)' % (file_, repo, log, git_name, git_mail, agent))
    staged = dvcs.list_staged(repo)
    modified = dvcs.list_modified(repo)
    if staged and not modified:
        log.ok('All files staged.')
        log.ok('Updating changelog')
        path = file_.path_abs.replace('{}/'.format(entity.path), '')
        changelog_messages = ['Added entity file {}'.format(path)]
        if agent:
            changelog_messages.append('@agent: %s' % agent)
        changelog.write_changelog_entry(
            entity.changelog_path, changelog_messages, git_name, git_mail)
        log.ok('git add %s' % entity.changelog_path_rel)
        git_files = [entity.changelog_path_rel]
        dvcs.stage(repo, git_files)
        
        log.ok('Committing')
        commit = dvcs.commit(repo, 'Added entity file(s)', agent)
        log.ok('commit: {}'.format(commit.hexsha))
        committed = dvcs.list_committed(repo, commit)
        committed.sort()
        log.ok('files committed:')
        for f in committed:
            log.ok('| %s' % f)
        
    else:
        log.not_ok('%s files staged, %s files modified' % (len(staged),len(modified)))
        log.not_ok('staged %s' % staged)
        log.not_ok('modified %s' % modified)
        log.not_ok('Can not commit!')
        raise Exception('Could not commit bc %s unstaged files: %s' % (len(modified), modified))
    return file_,repo,log
Example #2
0
def add_file_commit(entity, file_, repo, log, git_name, git_mail, agent):
    log.ok('add_file_commit(%s, %s, %s, %s, %s, %s)' % (file_, repo, log, git_name, git_mail, agent))
    staged = dvcs.list_staged(repo)
    modified = dvcs.list_modified(repo)
    if staged and not modified:
        log.ok('All files staged.')
        log.ok('Updating changelog')
        path = file_.path_abs.replace('{}/'.format(entity.path), '')
        changelog_messages = ['Added entity file {}'.format(path)]
        if agent:
            changelog_messages.append('@agent: %s' % agent)
        changelog.write_changelog_entry(
            entity.changelog_path, changelog_messages, git_name, git_mail)
        log.ok('git add %s' % entity.changelog_path_rel)
        git_files = [entity.changelog_path_rel]
        dvcs.stage(repo, git_files)
        
        log.ok('Committing')
        commit = dvcs.commit(repo, 'Added entity file(s)', agent)
        log.ok('commit: {}'.format(commit.hexsha))
        committed = dvcs.list_committed(repo, commit)
        committed.sort()
        log.ok('files committed:')
        for f in committed:
            log.ok('| %s' % f)
        
    else:
        log.not_ok('%s files staged, %s files modified' % (len(staged),len(modified)))
        log.not_ok('staged %s' % staged)
        log.not_ok('modified %s' % modified)
        log.not_ok('Can not commit!')
        raise Exception('Could not commit bc %s unstaged files: %s' % (len(modified), modified))
    return file_,repo,log
Example #3
0
def stage_files(entity, git_files, annex_files, new_files, log, show_staged=True):
    # TODO move to DDR.dvcs?
    repo = dvcs.repository(entity.collection_path)
    log.ok('| repo %s' % repo)
    # These vars will be used to determine if stage operation is successful.
    # If called in batch operation there may already be staged files.
    # stage_planned   Files added/modified by this function call
    # stage_already   Files that were already staged
    # stage_predicted List of staged files that should result from this operation.
    # stage_new       Files that are being added.
    stage_planned = git_files + annex_files
    stage_already = dvcs.list_staged(repo)
    stage_predicted = predict_staged(stage_already, stage_planned)
    stage_new = [x for x in stage_planned if x not in stage_already]
    log.ok('| %s files to stage:' % len(stage_planned))
    for sp in stage_planned:
        log.ok('|   %s' % sp)
    stage_ok = False
    staged = []
    try:
        log.ok('git stage')
        dvcs.stage(repo, git_files)
        log.ok('annex stage')
        dvcs.annex_stage(repo, annex_files)
        log.ok('ok')
        staged = dvcs.list_staged(repo)
    except:
        # FAILED! print traceback to addfile log
        log.not_ok(traceback.format_exc().strip())
    finally:
        if show_staged:
            log.ok('| %s files staged:' % len(staged))
            log.ok('show_staged %s' % show_staged)
            for sp in staged:
                log.ok('|   %s' % sp)
        if len(staged) == len(stage_predicted):
            log.ok('| %s files staged (%s new, %s modified)' % (
                len(staged), len(stage_new), len(stage_already))
            )
            stage_ok = True
        else:
            log.not_ok('%s new files staged (should be %s)' % (
                len(staged), len(stage_predicted))
            )
        if not stage_ok:
            log.not_ok('File staging aborted. Cleaning up')
            # try to pick up the pieces
            # mv files back to tmp_dir
            # TODO Properly clean up git-annex-added files.
            #      This clause moves the *symlinks* to annex files but leaves
            #      the actual binaries in the .git/annex objects dir.
            for tmp,dest in new_files:
                if os.path.islink(dest):
                    log.not_ok('| link (not moving) %s' % dest)
                else:
                    log.not_ok('| mv %s %s' % (dest,tmp))
                    os.rename(dest,tmp)
            log.not_ok('finished cleanup. good luck...')
            log.crash('Add file aborted, see log file for details: %s' % log.logpath)
    return repo
Example #4
0
def file_destroy(user_name,
                 user_mail,
                 collection,
                 entity,
                 rm_files,
                 updated_files,
                 agent='',
                 commit=True):
    """Remove file and metadata
    
    - check that paths exist, etc
    - intantiate collection, repo objects
    - remove entity dir
    - update control and changelog
    - commit everything
    
    @param user_name: Username for use in changelog, git log
    @param user_mail: User email address for use in changelog, git log
    @param collection: Collection
    @param entity: Entity
    @param rm_files: List of paths to files to delete (relative to entity files dir).
    @param updated_files: List of paths to updated file(s), relative to entitys.
    @param agent: (optional) Name of software making the change.
    @param commit: (optional) Commit files after staging them.
    @return: exit,message,touched_files ('ok' if successful)
    """
    repo = dvcs.repository(collection.path, user_name, user_mail)
    repo.git.checkout('master')
    dvcs.remote_add(repo, collection.git_url, config.GIT_REMOTE_NAME)

    # updated file paths are relative to collection root
    git_files = [f for f in updated_files]

    # remove the files
    # NOTE: File must be removed from filesystem at this point
    # so the File will be properly removed from the control file
    for f in rm_files:
        repo.git.rm('-rf', f)

    # update entity changelog
    changelog_files = [
        # dont list access files in changelog
        # TODO use a models.File function to ID the original file
        f for f in rm_files if ('-a.jpg' not in f) and ('.json' not in f)
    ]
    changelog_messages = [
        'Deleted file {}'.format(os.path.basename(f)) for f in changelog_files
    ]
    if agent:
        changelog_messages.append('@agent: %s' % agent)
    write_changelog_entry(entity.changelog_path, changelog_messages, user_name,
                          user_mail)

    git_files.append(entity.changelog_path_rel)
    dvcs.stage(repo, git_files)
    if commit:
        commit_obj = dvcs.commit(repo, 'Deleted file(s)', agent)
    return 0, 'ok', git_files
Example #5
0
def stage_files(entity, git_files, annex_files, log, show_staged=True):
    """Stage files; check before and after to ensure all files get staged

    @param entity: DDR.models.entities.Entity
    @param git_files: list
    @param annex_files: list
    @param log: AddFileLogger
    @param show_staged: bool
    @returns: repo
    """
    repo = dvcs.repository(entity.collection_path)
    log.ok('| repo %s' % repo)

    # Remove any files in git_files that are in annex_files
    git_files = [
        path for path in git_files
        if path not in annex_files
    ]
    
    log.ok('| BEFORE staging')
    staged_before,modified_before,untracked_before = repo_status(repo, log)
    
    stage_these = sorted(list(set(git_files + annex_files)))
    log.ok('| staging %s files:' % len(stage_these))
    for path in stage_these:
        log.ok('|   %s' % path)
    
    stage_ok = False
    staged = []
    try:
        log.ok('| annex stage')
        # Stage annex files (binaries) before non-binary git files
        # else binaries might end up in .git/objects/ which would be NOT GOOD
        dvcs.annex_stage(repo, annex_files)
        log.ok('| git stage')
        # If git_files contains binaries they are already staged by now.
        dvcs.stage(repo, git_files)
        log.ok('| ok')
    except:
        # FAILED! print traceback to addfile log
        log.not_ok(traceback.format_exc().strip())
        
    log.ok('| AFTER staging')
    staged_after,modified_after,untracked_after = repo_status(repo, log)
    
    # Crash if not staged
    still_modified = [path for path in stage_these if path in modified_after]
    if still_modified:
        log.not_ok('These files are still modified')
        for path in still_modified:
            log.not_ok('| %s' % path)
        log.crash('Add file aborted, see log file for details: %s' % log.logpath)
    
    return repo
Example #6
0
def entity_destroy(user_name,
                   user_mail,
                   entity,
                   updated_files,
                   agent='',
                   commit=True):
    """Command-line function for creating an entity and adding it to the collection.
    
    - check that paths exist, etc
    - intantiate collection, repo objects
    - remove entity dir
    - update control and changelog
    - commit everything
    
    @param user_name: Username for use in changelog, git log
    @param user_mail: User email address for use in changelog, git log
    @param entity: Entity
    @param updated_files: List of paths to updated file(s), relative to entitys.
    @param agent: (optional) Name of software making the change.
    @param commit: (optional) Commit files after staging them.
    @return: message ('ok' if successful)
    """
    collection = entity.collection()
    parent = entity.identifier.parent().object()
    repo = dvcs.repository(collection.path_abs, user_name, user_mail)
    repo.git.checkout('master')
    dvcs.remote_add(repo, collection.git_url, config.GIT_REMOTE_NAME)
    git_files = updated_files

    # remove entity directory
    # NOTE: entity files must be removed at this point so the entity will be
    # properly removed from the control file
    repo.git.rm('-rf', entity.path_abs)

    # prep collection log entries
    changelog_messages = [
        'Deleted entity {}'.format(entity.id),
    ]
    if agent:
        changelog_messages.append('@agent: %s' % agent)
    commit_message = dvcs.compose_commit_message(changelog_messages[0],
                                                 agent=agent)

    # collection changelog
    write_changelog_entry(parent.changelog_path,
                          changelog_messages,
                          user=user_name,
                          email=user_mail)
    git_files.append(parent.changelog_path)
    dvcs.stage(repo, git_files)
    # commit
    if commit:
        repo = commit_files(repo, commit_message, git_files)
    return 0, 'ok'
Example #7
0
def entity_destroy(user_name, user_mail, entity, updated_files, agent='', commit=True):
    """Command-line function for creating an entity and adding it to the collection.
    
    - check that paths exist, etc
    - intantiate collection, repo objects
    - remove entity dir
    - update control and changelog
    - commit everything
    
    @param user_name: Username for use in changelog, git log
    @param user_mail: User email address for use in changelog, git log
    @param entity: Entity
    @param updated_files: List of paths to updated file(s), relative to entitys.
    @param agent: (optional) Name of software making the change.
    @param commit: (optional) Commit files after staging them.
    @return: message ('ok' if successful)
    """
    collection = entity.collection()
    parent = entity.identifier.parent().object()
    repo = dvcs.repository(collection.path_abs, user_name, user_mail)
    repo.git.checkout('master')
    dvcs.remote_add(repo, collection.git_url, config.GIT_REMOTE_NAME)
    git_files = updated_files
    
    # remove entity directory
    # NOTE: entity files must be removed at this point so the entity will be
    # properly removed from the control file
    repo.git.rm('-rf', entity.path_abs)
    
    # prep collection log entries
    changelog_messages = ['Deleted entity {}'.format(entity.id),]
    if agent:
        changelog_messages.append('@agent: %s' % agent)
    commit_message = dvcs.compose_commit_message(changelog_messages[0], agent=agent)
    
    # collection changelog
    write_changelog_entry(parent.changelog_path,
                          changelog_messages,
                          user=user_name, email=user_mail)
    git_files.append(parent.changelog_path)
    dvcs.stage(repo, git_files)
    # commit
    if commit:
        repo = commit_files(repo, commit_message, git_files)
    return 0,'ok'
def test_import_entities(tmpdir, collection, test_csv_dir, test_files_dir):
    entity_csv_path = os.path.join(test_csv_dir, 'ddrimport-entity-new.csv')
    out = batch.Importer.import_entities(entity_csv_path,
                                         collection.identifier, VOCABS_URL,
                                         GIT_USER, GIT_MAIL, AGENT)
    print(out)
    out_ids = [o.id for o in out]
    assert out_ids == EXPECTED_ENTITY_IDS
    # save and commit
    git_files = []
    for o in out:
        exit, status, updated_files = o.save('pytest',
                                             '*****@*****.**',
                                             'pytest',
                                             collection=collection,
                                             commit=False)
        print(o, status)
        git_files += updated_files
    repo = dvcs.repository(collection.path_abs)
    dvcs.stage(repo, git_files)
    commit = repo.index.commit('test_import_entities')
Example #9
0
 def import_files(csv_path, cidentifier, vocabs_path, git_name, git_mail, agent, log_path=None, dryrun=False):
     """Adds or updates files from a CSV file
     
     TODO how to handle excluded fields like XMP???
     
     @param csv_path: Absolute path to CSV data file.
     @param cidentifier: Identifier
     @param vocabs_path: Absolute path to vocab dir
     @param git_name: str
     @param git_mail: str
     @param agent: str
     @param log_path: str Absolute path to addfile log for all files
     @param dryrun: boolean
     """
     logging.info('batch import files ----------------------------')
     
     # TODO hard-coded model name...
     model = 'file'
     
     csv_dir = os.path.dirname(csv_path)
     logging.debug('csv_dir %s' % csv_dir)
 
     # TODO this still knows too much about entities and files...
     entity_class = identifier.class_for_name(
         identifier.MODEL_CLASSES['entity']['module'],
         identifier.MODEL_CLASSES['entity']['class']
     )
     logging.debug('entity_class %s' % entity_class)
     
     logging.info('Reading %s' % csv_path)
     headers,rowds = csvfile.make_rowds(fileio.read_csv(csv_path))
     logging.info('%s rows' % len(rowds))
     
     # check for modified or uncommitted files in repo
     repository = dvcs.repository(cidentifier.path_abs())
     logging.debug(repository)
 
     fidentifiers = {
         rowd['id']: identifier.Identifier(
             id=rowd['id'],
             base_path=cidentifier.basepath
         )
         for rowd in rowds
     }
     fidentifier_parents = {
         fi.id: Importer._fidentifier_parent(fi)
         for fi in fidentifiers.itervalues()
     }
     # eidentifiers, removing duplicates
     eidentifiers = list(set([e for e in fidentifier_parents.itervalues()]))
     entities = {}
     bad_entities = []
     for eidentifier in eidentifiers:
         if os.path.exists(eidentifier.path_abs()):
             entity = eidentifier.object()
             entities[eidentifier.id] = entity
         else:
             if eidentifier.id not in bad_entities:
                 bad_entities.append(eidentifier.id)
     if bad_entities:
         for f in bad_entities:
             logging.error('    %s missing' % f)
         raise Exception('%s entities could not be loaded! - IMPORT CANCELLED!' % len(bad_entities))
 
     # separate into new and existing lists
     rowds_new = []
     rowds_existing = []
     for n,rowd in enumerate(rowds):
         if Importer._file_is_new(fidentifiers[rowd['id']]):
             rowds_new.append(rowd)
         else:
             rowds_existing.append(rowd)
     
     logging.info('- - - - - - - - - - - - - - - - - - - - - - - -')
     logging.info('Updating existing files')
     start_updates = datetime.now()
     git_files = []
     updated = []
     elapsed_rounds_updates = []
     staged = []
     obj_metadata = None
     for n,rowd in enumerate(rowds_existing):
         logging.info('+ %s/%s - %s (%s)' % (n+1, len(rowds), rowd['id'], rowd['basename_orig']))
         start_round = datetime.now()
         
         fidentifier = fidentifiers[rowd['id']]
         eidentifier = fidentifier_parents[fidentifier.id]
         entity = entities[eidentifier.id]
         file_ = fidentifier.object()
         modified = file_.load_csv(rowd)
         # Getting obj_metadata takes about 1sec each time
         # TODO caching works as long as all objects have same metadata...
         if not obj_metadata:
             obj_metadata = models.object_metadata(
                 fidentifier.fields_module(),
                 repository.working_dir
             )
         
         if dryrun:
             pass
         elif modified:
             logging.debug('    writing %s' % file_.json_path)
             file_.write_json(obj_metadata=obj_metadata)
             # TODO better to write to collection changelog?
             Importer._write_entity_changelog(entity, git_name, git_mail, agent)
             # stage
             git_files.append(file_.json_path_rel)
             git_files.append(entity.changelog_path_rel)
             updated.append(file_)
         
         elapsed_round = datetime.now() - start_round
         elapsed_rounds_updates.append(elapsed_round)
         logging.debug('| %s (%s)' % (fidentifier, elapsed_round))
     
     elapsed_updates = datetime.now() - start_updates
     logging.debug('%s updated in %s' % (len(elapsed_rounds_updates), elapsed_updates))
             
     if dryrun:
         pass
     elif git_files:
         logging.info('Staging %s modified files' % len(git_files))
         start_stage = datetime.now()
         dvcs.stage(repository, git_files)
         staged = util.natural_sort(dvcs.list_staged(repository))
         for path in staged:
             if path in git_files:
                 logging.debug('+ %s' % path)
             else:
                 logging.debug('| %s' % path)
         elapsed_stage = datetime.now() - start_stage
         logging.debug('ok (%s)' % elapsed_stage)
         logging.debug('%s staged in %s' % (len(staged), elapsed_stage))
     
     logging.info('- - - - - - - - - - - - - - - - - - - - - - - -')
     logging.info('Adding new files')
     start_adds = datetime.now()
     elapsed_rounds_adds = []
     logging.info('Checking source files')
     for rowd in rowds_new:
         rowd['src_path'] = os.path.join(csv_dir, rowd['basename_orig'])
         logging.debug('| %s' % rowd['src_path'])
         if not os.path.exists(rowd['src_path']):
             raise Exception('Missing file: %s' % rowd['src_path'])
     if log_path:
         logging.info('addfile logging to %s' % log_path)
     for n,rowd in enumerate(rowds_new):
         logging.info('+ %s/%s - %s (%s)' % (n+1, len(rowds), rowd['id'], rowd['basename_orig']))
         start_round = datetime.now()
         
         fidentifier = fidentifiers[rowd['id']]
         eidentifier = fidentifier_parents[fidentifier.id]
         entity = entities[eidentifier.id]
         logging.debug('| %s' % (entity))
 
         if dryrun:
             pass
         elif Importer._file_is_new(fidentifier):
             # ingest
             # TODO make sure this updates entity.files
             file_,repo2,log2 = ingest.add_file(
                 entity,
                 rowd['src_path'],
                 fidentifier.parts['role'],
                 rowd,
                 git_name, git_mail, agent,
                 log_path=log_path,
                 show_staged=False
             )
         
         elapsed_round = datetime.now() - start_round
         elapsed_rounds_adds.append(elapsed_round)
         logging.debug('| %s (%s)' % (file_, elapsed_round))
     
     elapsed_adds = datetime.now() - start_adds
     logging.debug('%s added in %s' % (len(elapsed_rounds_adds), elapsed_adds))
     logging.info('- - - - - - - - - - - - - - - - - - - - - - - -')
     
     return git_files
Example #10
0
 def import_entities(csv_path, cidentifier, vocabs_path, git_name, git_mail, agent, dryrun=False):
     """Adds or updates entities from a CSV file
     
     Running function multiple times with the same CSV file is idempotent.
     After the initial pass, files will only be modified if the CSV data
     has been updated.
     
     This function writes and stages files but does not commit them!
     That is left to the user or to another function.
     
     @param csv_path: Absolute path to CSV data file.
     @param cidentifier: Identifier
     @param vocabs_path: Absolute path to vocab dir
     @param git_name: str
     @param git_mail: str
     @param agent: str
     @param dryrun: boolean
     @returns: list of updated entities
     """
     logging.info('------------------------------------------------------------------------')
     logging.info('batch import entity')
     model = 'entity'
     
     repository = dvcs.repository(cidentifier.path_abs())
     logging.info(repository)
     
     logging.info('Reading %s' % csv_path)
     headers,rowds = csvfile.make_rowds(fileio.read_csv(csv_path))
     logging.info('%s rows' % len(rowds))
     
     logging.info('- - - - - - - - - - - - - - - - - - - - - - - -')
     logging.info('Importing')
     start_updates = datetime.now()
     git_files = []
     updated = []
     elapsed_rounds = []
     obj_metadata = None
     
     if dryrun:
         logging.info('Dry run - no modifications')
     for n,rowd in enumerate(rowds):
         logging.info('%s/%s - %s' % (n+1, len(rowds), rowd['id']))
         start_round = datetime.now()
         
         eidentifier = identifier.Identifier(id=rowd['id'], base_path=cidentifier.basepath)
         # if there is an existing object it will be loaded
         entity = eidentifier.object()
         if not entity:
             entity = models.Entity.create(eidentifier.path_abs(), eidentifier)
         modified = entity.load_csv(rowd)
         # Getting obj_metadata takes about 1sec each time
         # TODO caching works as long as all objects have same metadata...
         if not obj_metadata:
             obj_metadata = models.object_metadata(
                 eidentifier.fields_module(),
                 repository.working_dir
             )
         
         if dryrun:
             pass
         elif modified:
             # write files
             if not os.path.exists(entity.path_abs):
                 os.makedirs(entity.path_abs)
             logging.debug('    writing %s' % entity.json_path)
             entity.write_json(obj_metadata=obj_metadata)
             # TODO better to write to collection changelog?
             # TODO write all additions to changelog at one time
             Importer._write_entity_changelog(entity, git_name, git_mail, agent)
             # stage
             git_files.append(entity.json_path_rel)
             git_files.append(entity.changelog_path_rel)
             updated.append(entity)
         
         elapsed_round = datetime.now() - start_round
         elapsed_rounds.append(elapsed_round)
         logging.debug('| %s (%s)' % (eidentifier, elapsed_round))
 
     if dryrun:
         logging.info('Dry run - no modifications')
     elif updated:
         logging.info('Staging %s modified files' % len(git_files))
         start_stage = datetime.now()
         dvcs.stage(repository, git_files)
         for path in util.natural_sort(dvcs.list_staged(repository)):
             if path in git_files:
                 logging.debug('+ %s' % path)
             else:
                 logging.debug('| %s' % path)
         elapsed_stage = datetime.now() - start_stage
         logging.debug('ok (%s)' % elapsed_stage)
     
     elapsed_updates = datetime.now() - start_updates
     logging.debug('%s updated in %s' % (len(elapsed_rounds), elapsed_updates))
     logging.info('- - - - - - - - - - - - - - - - - - - - - - - -')
     
     return updated
Example #11
0
def file_destroy(user_name, user_mail, collection, entity, rm_files, updated_files, agent='', commit=True):
    """Remove file and metadata
    
    - check that paths exist, etc
    - intantiate collection, repo objects
    - remove entity dir
    - update control and changelog
    - commit everything
    
    @param user_name: Username for use in changelog, git log
    @param user_mail: User email address for use in changelog, git log
    @param collection: Collection
    @param entity: Entity
    @param rm_files: List of paths to files to delete (relative to entity files dir).
    @param updated_files: List of paths to updated file(s), relative to entitys.
    @param agent: (optional) Name of software making the change.
    @param commit: (optional) Commit files after staging them.
    @return: exit,message,touched_files ('ok' if successful)
    """
    repo = dvcs.repository(collection.path, user_name, user_mail)
    repo.git.checkout('master')
    dvcs.remote_add(repo, collection.git_url, config.GIT_REMOTE_NAME)
    
    # updated file paths are relative to collection root
    git_files = [os.path.join('files', entity.id, f) for f in updated_files]
    
    # remove the files
    # NOTE: File must be removed from filesystem at this point
    # so the File will be properly removed from the control file
    for f in rm_files:
        repo.git.rm('-rf', f)
    
    # update entity control
    econtrol = entity.control()
    econtrol.update_checksums(entity)
    econtrol.write()
    git_files.append(econtrol.path_rel)
    
    # update entity changelog
    changelog_files = [
        # dont list access files in changelog
        # TODO use a models.File function to ID the original file
        f for f in rm_files
        if ('-a.jpg' not in f) and ('.json' not in f)
    ]
    changelog_messages = [
        'Deleted file {}'.format(os.path.basename(f))
        for f in changelog_files
    ]
    if agent:
        changelog_messages.append('@agent: %s' % agent)
    write_changelog_entry(
        entity.changelog_path,
        changelog_messages,
        user_name, user_mail
    )
    
    git_files.append(entity.changelog_path_rel)
    dvcs.stage(repo, git_files)
    if commit:
        commit_obj = dvcs.commit(repo, 'Deleted file(s)', agent)
    return 0,'ok',git_files
Example #12
0
def stage_files(entity,
                git_files,
                annex_files,
                new_files,
                log,
                show_staged=True):
    # TODO move to DDR.dvcs?
    repo = dvcs.repository(entity.collection_path)
    log.ok('| repo %s' % repo)
    # These vars will be used to determine if stage operation is successful.
    # If called in batch operation there may already be staged files.
    # stage_planned   Files added/modified by this function call
    # stage_already   Files that were already staged
    # stage_predicted List of staged files that should result from this operation.
    # stage_new       Files that are being added.
    stage_planned = git_files + annex_files
    stage_already = dvcs.list_staged(repo)
    stage_predicted = predict_staged(stage_already, stage_planned)
    stage_new = [x for x in stage_planned if x not in stage_already]
    log.ok('| %s files to stage:' % len(stage_planned))
    for sp in stage_planned:
        log.ok('|   %s' % sp)
    stage_ok = False
    staged = []
    try:
        log.ok('git stage')
        dvcs.stage(repo, git_files)
        log.ok('annex stage')
        dvcs.annex_stage(repo, annex_files)
        log.ok('ok')
        staged = dvcs.list_staged(repo)
    except:
        # FAILED! print traceback to addfile log
        log.not_ok(traceback.format_exc().strip())
    finally:
        if show_staged:
            log.ok('| %s files staged:' % len(staged))
            log.ok('show_staged %s' % show_staged)
            for sp in staged:
                log.ok('|   %s' % sp)
        if len(staged) == len(stage_predicted):
            log.ok('| %s files staged (%s new, %s modified)' %
                   (len(staged), len(stage_new), len(stage_already)))
            stage_ok = True
        else:
            log.not_ok('%s new files staged (should be %s)' %
                       (len(staged), len(stage_predicted)))
        if not stage_ok:
            log.not_ok('File staging aborted. Cleaning up')
            # try to pick up the pieces
            # mv files back to tmp_dir
            # TODO Properly clean up git-annex-added files.
            #      This clause moves the *symlinks* to annex files but leaves
            #      the actual binaries in the .git/annex objects dir.
            for tmp, dest in new_files:
                if os.path.islink(dest):
                    log.not_ok('| link (not moving) %s' % dest)
                else:
                    log.not_ok('| mv %s %s' % (dest, tmp))
                    shutil.move(dest, tmp)
            log.not_ok('finished cleanup. good luck...')
            log.crash('Add file aborted, see log file for details: %s' %
                      log.logpath)
    return repo