Exemple #1
0
    def on_post(self, req, resp, dataset, snapshot):
        """Commit a revision (snapshot) from the working tree."""
        queue = dataset_queue(dataset)
        media = req.media
        description_fields = {}
        snapshot_changes = []
        if media != None:
            description_fields = media.get('description_fields')
            snapshot_changes = media.get('snapshot_changes')

        monitor_remote_configs.s(
            self.store.annex_path, dataset,
            snapshot).set(queue=dataset_queue(dataset)).apply_async()

        created = create_snapshot(self.store.annex_path, dataset, snapshot,
                                  description_fields, snapshot_changes)
        if not created.failed():
            resp.media = created.get()
            resp.status = falcon.HTTP_OK
            # Publish after response
            publish = publish_snapshot.s(self.store.annex_path, dataset,
                                         snapshot, req.cookies)
            skip_publishing = req.media != None and media.get(
                'skip_publishing')
            if not skip_publishing and skip_publishing is not None:
                publish.apply_async(queue=queue)
        else:
            resp.media = {'error': 'tag already exists'}
            resp.status = falcon.HTTP_CONFLICT
Exemple #2
0
def create_snapshot(annex_path, dataset, snapshot, description_fields,
                    snapshot_changes):
    """
    Create a new snapshot (git tag).

    Raises an exception if the tag already exists.
    """
    queue = dataset_queue(dataset)
    name_test = validate_snapshot_name.signature(queue=queue,
                                                 args=(annex_path, dataset,
                                                       snapshot),
                                                 immutable=True)
    updated_description = update_description.signature(
        queue=queue,
        args=(annex_path, dataset, description_fields),
        immutable=True)
    updated_changes = update_changes.signature(queue=queue,
                                               args=(annex_path, dataset,
                                                     snapshot,
                                                     snapshot_changes),
                                               immutable=True)
    snapshot_saved = save_snapshot.signature(queue=queue,
                                             args=(annex_path, dataset,
                                                   snapshot),
                                             immutable=True)
    load_new_snapshot = get_snapshot.signature(queue=queue,
                                               args=(annex_path, dataset,
                                                     snapshot),
                                               immutable=True)
    chain = name_test | updated_description | updated_changes | snapshot_saved | load_new_snapshot
    return chain.apply_async()
Exemple #3
0
def create_snapshot(store, dataset, snapshot, description_fields,
                    snapshot_changes):
    """
    Create a new snapshot (git tag).

    Raises an exception if the tag already exists.
    """
    ds = store.get_dataset(dataset)
    # Search for any existing tags
    tagged = [tag for tag in ds.repo.get_tags() if tag['name'] == snapshot]
    if not tagged:
        queue = dataset_queue(dataset)
        updated_description = update_description.apply(
            queue=queue, args=(store.annex_path, dataset, description_fields))
        updated_changes = update_changes.apply(queue=queue,
                                               args=(store.annex_path, dataset,
                                                     snapshot,
                                                     snapshot_changes))
        updated_description.wait()
        updated_changes.wait()
        if not updated_description.failed() and not updated_changes.failed():
            ds.save(version_tag=snapshot)
    else:
        raise Exception(
            'Tag "{}" already exists, name conflict'.format(snapshot))
Exemple #4
0
 def on_put(self, req, resp, dataset, filename):
     """Put will only update existing files and automatically unlocks them."""
     queue = dataset_queue(dataset)
     if filename:
         ds_path = self.store.get_dataset_path(dataset)
         file_path = os.path.join(ds_path, filename)
         if os.path.exists(file_path):
             ds = self.store.get_dataset(dataset)
             media_dict = {'updated': filename}
             # Record if this was done on behalf of a user
             name, email = get_user_info(req)
             if name and email:
                 media_dict['name'] = name
                 media_dict['email'] = email
             unlock = unlock_files.apply_async(queue=queue, args=(self.annex_path, dataset), kwargs={
                                               'files': [filename]})
             unlock.wait()
             self._update_file(file_path, req.stream)
             commit = commit_files.apply_async(queue=queue, args=(self.annex_path, dataset), kwargs={
                                               'files': [filename], 'name': name, 'email': email, 'cookies': req.cookies})
             commit.wait()
             # ds.publish(to='github')
             if not commit.failed():
                 resp.media = media_dict
                 resp.status = falcon.HTTP_OK
             resp.media = media_dict
             resp.status = falcon.HTTP_OK
         else:
             resp.media = {'error': 'no such file'}
             resp.status = falcon.HTTP_NOT_FOUND
     else:
         resp.media = {'error': 'filename is missing'}
         resp.status = falcon.HTTP_BAD_REQUEST
Exemple #5
0
def commit_files(store,
                 dataset,
                 files,
                 name=None,
                 email=None,
                 validate=True,
                 cookies=None):
    """
    Commit a list of files with the email and name provided.

    Returns the commit hash generated.
    """
    ds = store.get_dataset(dataset)
    with CommitInfo(ds, name, email):
        if files:
            for filename in files:
                ds.add(filename)
        else:
            # If no list of paths, add all untracked files
            ds.add('.')
    ref = ds.repo.get_hexsha()
    if validate:
        # Run the validator but don't block on the request
        queue = dataset_queue(dataset)
        validate_dataset.s(dataset, ds.path, ref,
                           cookies).apply_async(queue=queue)
    return ref
Exemple #6
0
    def on_post(self, req, resp, dataset):
        """
        Commmit a draft change.

        This adds all files in the working tree.
        """
        if dataset:
            queue = dataset_queue(dataset)
            # Record if this was done on behalf of a user
            name, email = get_user_info(req)
            media_dict = {}
            if name and email:
                media_dict['name'] = name
                media_dict['email'] = email
            commit = commit_files.apply_async(queue=queue,
                                              args=(self.annex_path, dataset),
                                              kwargs={
                                                  'files': None,
                                                  'name': name,
                                                  'email': email,
                                                  'cookies': req.cookies
                                              })
            commit.wait()
            if not commit.failed():
                # Attach the commit hash to response
                media_dict['ref'] = commit.get()
                resp.media = media_dict
                resp.status = falcon.HTTP_OK
        else:
            resp.media = {
                'error': 'Missing or malformed dataset parameter in request.'
            }
            resp.status = falcon.HTTP_UNPROCESSABLE_ENTITY
 def on_post(self, req, resp, dataset):
     """
     Commmit a description change.
     Returns update dataset_description
     """
     if dataset:
         try:
             description_fields = req.media.get('description_fields')
             if not any(description_fields):
                 resp.media = {
                     'error': 'Missing description field updates.'
                 }
                 resp.status = falcon.HTTP_UNPROCESSABLE_ENTITY
             queue = dataset_queue(dataset)
             updated = update_description.apply_async(
                 queue=queue,
                 args=(self.store.annex_path, dataset, description_fields))
             updated.wait()
             if updated.failed():
                 resp.media = {'error': 'dataset update failed'}
                 resp.status = falcon.HTTP_500
             else:
                 dataset_description = updated.get()
                 resp.media = dataset_description
                 resp.status = falcon.HTTP_OK
         except:
             resp.media = {
                 'error': 'Unexpected error in dataset_description update.'
             }
             resp.status = falcon.HTTP_500
     else:
         resp.media = {
             'error': 'Missing or malformed dataset parameter in request.'
         }
         resp.status = falcon.HTTP_UNPROCESSABLE_ENTITY
Exemple #8
0
    def on_delete(self, req, resp, dataset, filename):
        """Delete an existing file from a dataset"""
        queue = dataset_queue(dataset)
        if filename:
            ds_path = self.store.get_dataset_path(dataset)
            file_path = os.path.join(ds_path, filename)
            if os.path.exists(file_path):
                ds = self.store.get_dataset(dataset)
                media_dict = {'deleted': filename}
                name, email = get_user_info(req)
                if name and email:
                    media_dict['name'] = name
                    media_dict['email'] = email

                # unlock = unlock_files.apply_async(queue=queue, args=(self.annex_path, dataset), kwargs={'files': [filename]})
                # unlock.wait()

                remove = remove_files.apply_async(queue=queue, args=(self.annex_path, dataset), kwargs={
                                                  'files': [filename], 'name': name, 'email': email})
                remove.wait()

                resp.media = media_dict
                resp.status = falcon.HTTP_OK
            else:
                resp.media = {'error': 'no such file'}
                resp.status = falcon.HTTP_NOT_FOUND
        else:
            resp.media = {'error': 'filename is missing'}
            resp.status = falcon.HTTP_BAD_REQUEST
Exemple #9
0
def audit_datasets(store):
    dataset_dirs = os.listdir(store.annex_path)
    dataset = random.choice(dataset_dirs)
    # Randomize start time a bit to reduce risk of stampedes
    countdown = random.randint(1, 30)
    audit_remotes.apply_async((store.annex_path, dataset),
                              queue=dataset_queue(dataset),
                              countdown=countdown)
Exemple #10
0
 def on_post(self, req, resp, dataset):
     datalad = self.store.get_dataset(dataset)
     queue = dataset_queue(dataset)
     publish = migrate_to_bucket.s(self.store.annex_path,
                                   dataset,
                                   cookies=req.cookies)
     publish.apply_async(queue=queue)
     resp.media = {}
     resp.status = falcon.HTTP_OK
Exemple #11
0
def update_head(store, dataset, cookies=None):
    """Pass HEAD commit references back to OpenNeuro"""
    ds = store.get_dataset(dataset)
    ref = ds.repo.get_hexsha()
    # We may want to detect if we need to run validation here?
    queue = dataset_queue(dataset)
    validate_dataset.s(dataset, ds.path, ref).apply_async(queue=queue)
    r = requests.post(url=GRAPHQL_ENDPOINT,
                      json=draft_revision_mutation(dataset, ref),
                      cookies=cookies)
    if r.status_code != 200:
        raise Exception(r.text)
Exemple #12
0
 def on_delete(self, req, resp, dataset):
     queue = dataset_queue(dataset)
     deleted = delete_dataset.apply_async(queue=queue,
                                          args=(self.store.annex_path,
                                                dataset))
     deleted.wait()
     if deleted.failed():
         resp.media = {'error': 'dataset not found'}
         resp.status = falcon.HTTP_NOT_FOUND
     else:
         resp.media = {}
         resp.status = falcon.HTTP_OK
Exemple #13
0
 def on_get(self, req, resp, dataset):
     """
     Return draft state (other than files).
     """
     if dataset:
         queue = dataset_queue(dataset)
         # Maybe turn this into status?
         partial = is_dirty.apply_async(queue=queue,
                                        args=(self.annex_path, dataset))
         partial.wait()
         resp.media = {'partial': partial.get()}
         resp.status = falcon.HTTP_OK
Exemple #14
0
def publish_snapshot(store, dataset, snapshot, cookies=None, realm=None):
    """Publish a snapshot tag to S3, GitHub or both."""
    ds = store.get_dataset(dataset)
    siblings = ds.siblings()

    realm = get_dataset_realm(ds, siblings, realm)

    # Create the sibling if it does not exist
    s3_sibling(ds, siblings)

    # Export to S3 and GitHub in another worker
    publish_s3_async \
        .s(store.annex_path, dataset, snapshot,
           realm.s3_remote, realm.s3_bucket, cookies) \
        .apply_async(queue=dataset_queue(dataset))

    # Public publishes to GitHub
    if realm == DatasetRealm.PUBLIC and DATALAD_GITHUB_EXPORTS_ENABLED:
        # Create Github sibling only if GitHub is enabled
        github_sibling(ds, dataset, siblings)
        publish_github_async \
            .s(store.annex_path, dataset, snapshot, realm.github_remote) \
            .apply_async(queue=dataset_queue(dataset))
Exemple #15
0
    def on_get(self, req, resp, dataset, filename=None, snapshot='HEAD'):
        ds_path = self.store.get_dataset_path(dataset)
        if filename:
            try:
                ds = self.store.get_dataset(dataset)
                if ds.repo.is_under_annex([filename])[0]:
                    path = ds.repo.repo.git.show(snapshot + ':' + filename)
                    # remove leading relative folder paths
                    fd = path[path.find('.git/annex'):]

                    # if fd fails, that means the file is not present in the annex and we need to get it from s3
                    # so we send the client a 404 to indicate the file was not found locally.
                    fd = open(os.path.join(ds_path, fd), 'rb')
                    resp.stream = fd
                    resp.stream_len = os.fstat(fd.fileno()).st_size
                    resp.status = falcon.HTTP_OK
                else:
                    resp.body = ds.repo.repo.git.show(snapshot + ':' +
                                                      filename)
                    resp.status = falcon.HTTP_OK
            except git.exc.GitCommandError:
                # File is not present in tree
                resp.media = {'error': 'file not found in git tree'}
                resp.status = falcon.HTTP_NOT_FOUND
            except IOError:
                # File is not kept locally
                resp.media = {'error': 'file not found'}
                resp.status = falcon.HTTP_NOT_FOUND
            except:
                # Some unknown error
                resp.media = {
                    'error': 'an unknown error occurred accessing this file'
                }
                resp.status = falcon.HTTP_INTERNAL_SERVER_ERROR
                self.logger.exception(
                    'An unknown error processing file "{}"'.format(filename))
        else:
            # Request for index of files
            # Return a list of file objects
            # {name, path, size}
            queue = dataset_queue(dataset)
            if "untracked" in req.params:
                files = get_untracked_files.apply_async(
                    queue=queue, args=(self.store.annex_path, dataset))
                resp.media = {'files': files.get()}
            else:
                files = get_files.apply_async(queue=queue,
                                              args=(self.store.annex_path,
                                                    dataset, snapshot))
                resp.media = {'files': files.get()}
Exemple #16
0
 def on_get(self, req, resp, dataset, snapshot=None):
     """Get the tree of files for a snapshot."""
     queue = dataset_queue(dataset)
     if snapshot:
         ds = self.store.get_dataset(dataset)
         files = get_files.s(self.store.annex_path, dataset,
                             branch=snapshot).apply_async(queue=queue)
         resp.media = self._get_snapshot(dataset, snapshot, files.get())
         resp.status = falcon.HTTP_OK
     else:
         tags = get_snapshots.s(self.store.annex_path,
                                dataset).apply_async(queue=queue)
         # Index of all tags
         ds = self.store.get_dataset(dataset)
         resp.media = {'snapshots': tags.get()}
         resp.status = falcon.HTTP_OK
 def on_post(self, req, resp, dataset, snapshot):
     """Commit a revision (snapshot) from the working tree."""
     queue = dataset_queue(dataset)
     create = create_snapshot.si(
         self.store.annex_path, dataset, snapshot).set(queue=queue)
     created = create.apply_async()
     created.wait()
     if not created.failed():
         resp.media = get_snapshot.s(
             self.store.annex_path, dataset, snapshot).apply_async(queue=queue).get()
         resp.status = falcon.HTTP_OK
         # Publish after response
         publish = publish_snapshot.s(
             self.store.annex_path, dataset, snapshot, req.cookies)
         publish.apply_async(queue=queue)
     else:
         resp.media = {'error': 'tag already exists'}
         resp.status = falcon.HTTP_CONFLICT
Exemple #18
0
    def on_post(self, req, resp, dataset):
        ds_path = self.store.get_dataset_path(dataset)
        if (os.path.isdir(ds_path)):
            resp.media = {'error': 'dataset already exists'}
            resp.status = falcon.HTTP_CONFLICT
        else:
            queue = dataset_queue(dataset)
            # Record if this was done on behalf of a user
            name, email = get_user_info(req)

            created = create_dataset.apply_async(queue=queue,
                                                 args=(self.store.annex_path,
                                                       dataset, name, email))
            created.wait()
            if created.failed():
                resp.media = {'error': 'dataset creation failed'}
                resp.status = falcon.HTTP_500
            else:
                resp.media = {}
                resp.status = falcon.HTTP_OK
Exemple #19
0
    def on_post(self, req, resp, dataset, filename):
        """Post will create new files and adds them to the annex if they do not exist, else update existing files."""
        queue = dataset_queue(dataset)
        if filename:
            ds_path = self.store.get_dataset_path(dataset)
            file_path = os.path.join(ds_path, filename)
            if os.path.exists(file_path):
                ds = self.store.get_dataset(dataset)
                media_dict = {'updated': filename}
                # Record if this was done on behalf of a user
                name, email = get_user_info(req)
                if name and email:
                    media_dict['name'] = name
                    media_dict['email'] = email
                unlock = unlock_files.apply_async(queue=queue,
                                                  args=(self.annex_path,
                                                        dataset),
                                                  kwargs={'files': [filename]})
                unlock.wait()
                self._update_file(file_path, req.stream)
                # ds.publish(to='github')
                resp.media = media_dict
                resp.status = falcon.HTTP_OK
            else:
                try:
                    # Make any missing parent directories

                    os.makedirs(os.path.dirname(file_path), exist_ok=True)
                    # Begin writing stream to disk
                    self._update_file(file_path, req.stream)
                    # Add to dataset
                    ds = self.store.get_dataset(dataset)
                    media_dict = {'created': filename}
                    resp.media = media_dict
                    resp.status = falcon.HTTP_OK
                except PermissionError:
                    resp.media = {'error': 'file already exists'}
                    resp.status = falcon.HTTP_CONFLICT
        else:
            resp.media = {'error': 'filename is missing'}
            resp.status = falcon.HTTP_BAD_REQUEST
Exemple #20
0
 def schedule_celery_tasks(sender, **kwargs):
     """Run all periodic tasks."""
     sender.add_periodic_task(
         60 * 15, audit_datasets.s(annex_path), queue=dataset_queue('publish'))