Beispiel #1
0
def validate_datalad_config(store, dataset):
    """Add a .datalad/config file if one does not exist."""
    dataset_path = store.get_dataset_path(dataset)
    try:
        git_show(dataset_path, 'HEAD', '.datalad/config')
    except KeyError:
        create_datalad_config(dataset_path)
        commit_files(store, dataset, ['.datalad/config'])
Beispiel #2
0
    def on_get(self, req, resp, dataset, filename=None, snapshot='HEAD'):
        ds_path = self.store.get_dataset_path(dataset)
        if filename:
            try:
                ds = self.store.get_dataset(dataset)
                if ds.repo.is_under_annex([filename])[0]:
                    path = git_show(ds_path, snapshot + ':' + filename)
                    # remove leading relative folder paths
                    fd = path[path.find('.git/annex'):]

                    # if fd fails, that means the file is not present in the annex and we need to get it from s3
                    # so we send the client a 404 to indicate the file was not found locally.
                    fd = open(os.path.join(ds_path, fd), 'rb')
                    resp.stream = fd
                    resp.stream_len = os.fstat(fd.fileno()).st_size
                    resp.status = falcon.HTTP_OK
                else:
                    resp.body = git_show(ds_path, snapshot + ':' + filename)
                    resp.status = falcon.HTTP_OK
            except CalledProcessError:
                # File is not present in tree
                resp.media = {'error': 'file not found in git tree'}
                resp.status = falcon.HTTP_NOT_FOUND
            except IOError:
                # File is not kept locally
                resp.media = {'error': 'file not found'}
                resp.status = falcon.HTTP_NOT_FOUND
            except:
                # Some unknown error
                resp.media = {
                    'error': 'an unknown error occurred accessing this file'
                }
                resp.status = falcon.HTTP_INTERNAL_SERVER_ERROR
                self.logger.exception(
                    'An unknown error processing file "{}"'.format(filename))
        else:
            # Request for index of files
            # Return a list of file objects
            # {name, path, size}
            try:
                if "untracked" in req.params:
                    files = get_untracked_files(self.store, dataset)
                    resp.media = {'files': files}
                else:
                    files = get_files(self.store, dataset, snapshot)
                    resp.media = {'files': files}
            except:
                resp.status = falcon.HTTP_INTERNAL_SERVER_ERROR
Beispiel #3
0
def update_description(store,
                       dataset,
                       description_fields,
                       name=None,
                       email=None):
    ds = store.get_dataset(dataset)
    description = git_show(ds.path, 'HEAD:dataset_description.json')
    description_json = json.loads(description)
    if description_json.get('License') != 'CC0':
        description_fields = edit_description(description_fields,
                                              {'License': 'CC0'})
    if description_fields is not None and any(description_fields):
        updated = edit_description(description_json, description_fields)
        path = os.path.join(store.get_dataset_path(dataset),
                            'dataset_description.json')
        with open(path, 'r+', encoding='utf-8') as description_file:
            description_file_contents = description_file.read()
            if description != description_file_contents:
                raise Exception('unexpected dataset_description.json contents')
            description_file.seek(0)
            description_file.truncate(0)
            description_file.write(json.dumps(updated, indent=4))
        # Commit new content, run validator
        commit_files(store, dataset, ['dataset_description.json'])
        return updated
    else:
        return description_json
Beispiel #4
0
def check_remote_has_version(dataset_path, remote, tag):
    try:
        info = get_tag_info(dataset_path, tag)
        remotes = info.get('repositories containing these files', [])
        remote_repo = [
            r for r in remotes if r.get('description') == f'[{remote}]'
        ]
        remote_id_A = remote_repo and remote_repo[0].get('uuid')

        # extract remote uuid and associated git tree id from `git show git-annex:export.log`
        # this command only logs the latest export. previously exported tags will not show
        export_log = git_show(dataset_path, 'git-annex', 'export.log')
        log_remote_id_pattern = re.compile(':(.+) .+$')
        match = log_remote_id_pattern.search(export_log)
        remote_id_B = match.group(1)
        log_tree_id_pattern = re.compile('.* (.+)$')
        match = log_tree_id_pattern.search(export_log)
        tree_id_A = match.group(1)

        # extract git tree id of <tag> from git reference
        repo = pygit2.Repository(dataset_path)
        tree_id_B = git_tag_tree(repo, tag)
    except AttributeError:
        return False
    # if the remote uuids and tree ids exist and match, then
    # <tag> is the latest export to <remote>
    return remote_id_A == remote_id_B and tree_id_A == tree_id_B
def test_write_new_changes(datalad_store, new_dataset):
    ds_id = os.path.basename(new_dataset.path)
    write_new_changes(new_dataset, '1.0.1', ['Some changes'], '2019-01-01')
    # Manually make the commit without validation
    new_dataset.save('CHANGES')
    # Get a fresh dataset object and verify correct CHANGES
    dataset = Dataset(os.path.join(datalad_store.annex_path, ds_id))
    assert not dataset.repo.dirty
    assert git_show(dataset.path, 'HEAD:CHANGES') == '''1.0.1 2019-01-01
Beispiel #6
0
 def on_get(self, req, resp, dataset, filename=None, snapshot='HEAD'):
     ds_path = self.store.get_dataset_path(dataset)
     if filename:
         try:
             file_content = git_show(ds_path, snapshot, filename)
             # If the file begins with an annex path, return that path
             if file_content[0:4096].find('.git/annex') != -1:
                 # Resolve absolute path for annex target
                 target_path = os.path.join(ds_path,
                                            os.path.dirname(filename),
                                            file_content)
                 # Verify the annex path is within the dataset dir
                 if ds_path == os.path.commonpath((ds_path, target_path)):
                     fd = open(target_path, 'rb')
                     resp.stream = fd
                     resp.stream_len = os.fstat(fd.fileno()).st_size
                     resp.status = falcon.HTTP_OK
                 else:
                     resp.media = {'error': 'file not found in git tree'}
                     resp.status = falcon.HTTP_NOT_FOUND
             else:
                 resp.body = file_content
                 resp.status = falcon.HTTP_OK
         except KeyError:
             # File is not present in tree
             resp.media = {'error': 'file not found in git tree'}
             resp.status = falcon.HTTP_NOT_FOUND
         except IOError:
             # File is not kept locally
             resp.media = {'error': 'file not found'}
             resp.status = falcon.HTTP_NOT_FOUND
         except:
             # Some unknown error
             resp.media = {
                 'error': 'an unknown error occurred accessing this file'
             }
             resp.status = falcon.HTTP_INTERNAL_SERVER_ERROR
             self.logger.exception(
                 'An unknown error processing file "{}"'.format(filename))
     else:
         # Request for index of files
         # Return a list of file objects
         # {name, path, size}
         try:
             files = get_files(self.store, dataset, snapshot)
             resp.media = {'files': files}
         except:
             resp.status = falcon.HTTP_INTERNAL_SERVER_ERROR
Beispiel #7
0
def test_git_show(new_dataset):
    assert git.git_show(new_dataset.path, 'HEAD',
                        'dataset_description.json') == '{"BIDSVersion": "1.0.2", "License": "This is not a real dataset", "Name": "Test fixture new dataset"}'
Beispiel #8
0
def get_head_changes(dataset_path):
    try:
        return git_show(dataset_path, 'HEAD', 'CHANGES')
    except KeyError:
        return None
Beispiel #9
0
def get_head_changes(ds):
    try:
        return git_show(ds.path, 'HEAD:CHANGES')
    except CalledProcessError:
        return None