def on_post(self, req, resp, dataset, snapshot): """Commit a revision (snapshot) from the working tree.""" queue = dataset_queue(dataset) media = req.media description_fields = {} snapshot_changes = [] if media != None: description_fields = media.get('description_fields') snapshot_changes = media.get('snapshot_changes') monitor_remote_configs.s( self.store.annex_path, dataset, snapshot).set(queue=dataset_queue(dataset)).apply_async() created = create_snapshot(self.store.annex_path, dataset, snapshot, description_fields, snapshot_changes) if not created.failed(): resp.media = created.get() resp.status = falcon.HTTP_OK # Publish after response publish = publish_snapshot.s(self.store.annex_path, dataset, snapshot, req.cookies) skip_publishing = req.media != None and media.get( 'skip_publishing') if not skip_publishing and skip_publishing is not None: publish.apply_async(queue=queue) else: resp.media = {'error': 'tag already exists'} resp.status = falcon.HTTP_CONFLICT
def create_snapshot(annex_path, dataset, snapshot, description_fields, snapshot_changes): """ Create a new snapshot (git tag). Raises an exception if the tag already exists. """ queue = dataset_queue(dataset) name_test = validate_snapshot_name.signature(queue=queue, args=(annex_path, dataset, snapshot), immutable=True) updated_description = update_description.signature( queue=queue, args=(annex_path, dataset, description_fields), immutable=True) updated_changes = update_changes.signature(queue=queue, args=(annex_path, dataset, snapshot, snapshot_changes), immutable=True) snapshot_saved = save_snapshot.signature(queue=queue, args=(annex_path, dataset, snapshot), immutable=True) load_new_snapshot = get_snapshot.signature(queue=queue, args=(annex_path, dataset, snapshot), immutable=True) chain = name_test | updated_description | updated_changes | snapshot_saved | load_new_snapshot return chain.apply_async()
def create_snapshot(store, dataset, snapshot, description_fields, snapshot_changes): """ Create a new snapshot (git tag). Raises an exception if the tag already exists. """ ds = store.get_dataset(dataset) # Search for any existing tags tagged = [tag for tag in ds.repo.get_tags() if tag['name'] == snapshot] if not tagged: queue = dataset_queue(dataset) updated_description = update_description.apply( queue=queue, args=(store.annex_path, dataset, description_fields)) updated_changes = update_changes.apply(queue=queue, args=(store.annex_path, dataset, snapshot, snapshot_changes)) updated_description.wait() updated_changes.wait() if not updated_description.failed() and not updated_changes.failed(): ds.save(version_tag=snapshot) else: raise Exception( 'Tag "{}" already exists, name conflict'.format(snapshot))
def on_put(self, req, resp, dataset, filename): """Put will only update existing files and automatically unlocks them.""" queue = dataset_queue(dataset) if filename: ds_path = self.store.get_dataset_path(dataset) file_path = os.path.join(ds_path, filename) if os.path.exists(file_path): ds = self.store.get_dataset(dataset) media_dict = {'updated': filename} # Record if this was done on behalf of a user name, email = get_user_info(req) if name and email: media_dict['name'] = name media_dict['email'] = email unlock = unlock_files.apply_async(queue=queue, args=(self.annex_path, dataset), kwargs={ 'files': [filename]}) unlock.wait() self._update_file(file_path, req.stream) commit = commit_files.apply_async(queue=queue, args=(self.annex_path, dataset), kwargs={ 'files': [filename], 'name': name, 'email': email, 'cookies': req.cookies}) commit.wait() # ds.publish(to='github') if not commit.failed(): resp.media = media_dict resp.status = falcon.HTTP_OK resp.media = media_dict resp.status = falcon.HTTP_OK else: resp.media = {'error': 'no such file'} resp.status = falcon.HTTP_NOT_FOUND else: resp.media = {'error': 'filename is missing'} resp.status = falcon.HTTP_BAD_REQUEST
def commit_files(store, dataset, files, name=None, email=None, validate=True, cookies=None): """ Commit a list of files with the email and name provided. Returns the commit hash generated. """ ds = store.get_dataset(dataset) with CommitInfo(ds, name, email): if files: for filename in files: ds.add(filename) else: # If no list of paths, add all untracked files ds.add('.') ref = ds.repo.get_hexsha() if validate: # Run the validator but don't block on the request queue = dataset_queue(dataset) validate_dataset.s(dataset, ds.path, ref, cookies).apply_async(queue=queue) return ref
def on_post(self, req, resp, dataset): """ Commmit a draft change. This adds all files in the working tree. """ if dataset: queue = dataset_queue(dataset) # Record if this was done on behalf of a user name, email = get_user_info(req) media_dict = {} if name and email: media_dict['name'] = name media_dict['email'] = email commit = commit_files.apply_async(queue=queue, args=(self.annex_path, dataset), kwargs={ 'files': None, 'name': name, 'email': email, 'cookies': req.cookies }) commit.wait() if not commit.failed(): # Attach the commit hash to response media_dict['ref'] = commit.get() resp.media = media_dict resp.status = falcon.HTTP_OK else: resp.media = { 'error': 'Missing or malformed dataset parameter in request.' } resp.status = falcon.HTTP_UNPROCESSABLE_ENTITY
def on_post(self, req, resp, dataset): """ Commmit a description change. Returns update dataset_description """ if dataset: try: description_fields = req.media.get('description_fields') if not any(description_fields): resp.media = { 'error': 'Missing description field updates.' } resp.status = falcon.HTTP_UNPROCESSABLE_ENTITY queue = dataset_queue(dataset) updated = update_description.apply_async( queue=queue, args=(self.store.annex_path, dataset, description_fields)) updated.wait() if updated.failed(): resp.media = {'error': 'dataset update failed'} resp.status = falcon.HTTP_500 else: dataset_description = updated.get() resp.media = dataset_description resp.status = falcon.HTTP_OK except: resp.media = { 'error': 'Unexpected error in dataset_description update.' } resp.status = falcon.HTTP_500 else: resp.media = { 'error': 'Missing or malformed dataset parameter in request.' } resp.status = falcon.HTTP_UNPROCESSABLE_ENTITY
def on_delete(self, req, resp, dataset, filename): """Delete an existing file from a dataset""" queue = dataset_queue(dataset) if filename: ds_path = self.store.get_dataset_path(dataset) file_path = os.path.join(ds_path, filename) if os.path.exists(file_path): ds = self.store.get_dataset(dataset) media_dict = {'deleted': filename} name, email = get_user_info(req) if name and email: media_dict['name'] = name media_dict['email'] = email # unlock = unlock_files.apply_async(queue=queue, args=(self.annex_path, dataset), kwargs={'files': [filename]}) # unlock.wait() remove = remove_files.apply_async(queue=queue, args=(self.annex_path, dataset), kwargs={ 'files': [filename], 'name': name, 'email': email}) remove.wait() resp.media = media_dict resp.status = falcon.HTTP_OK else: resp.media = {'error': 'no such file'} resp.status = falcon.HTTP_NOT_FOUND else: resp.media = {'error': 'filename is missing'} resp.status = falcon.HTTP_BAD_REQUEST
def audit_datasets(store): dataset_dirs = os.listdir(store.annex_path) dataset = random.choice(dataset_dirs) # Randomize start time a bit to reduce risk of stampedes countdown = random.randint(1, 30) audit_remotes.apply_async((store.annex_path, dataset), queue=dataset_queue(dataset), countdown=countdown)
def on_post(self, req, resp, dataset): datalad = self.store.get_dataset(dataset) queue = dataset_queue(dataset) publish = migrate_to_bucket.s(self.store.annex_path, dataset, cookies=req.cookies) publish.apply_async(queue=queue) resp.media = {} resp.status = falcon.HTTP_OK
def update_head(store, dataset, cookies=None): """Pass HEAD commit references back to OpenNeuro""" ds = store.get_dataset(dataset) ref = ds.repo.get_hexsha() # We may want to detect if we need to run validation here? queue = dataset_queue(dataset) validate_dataset.s(dataset, ds.path, ref).apply_async(queue=queue) r = requests.post(url=GRAPHQL_ENDPOINT, json=draft_revision_mutation(dataset, ref), cookies=cookies) if r.status_code != 200: raise Exception(r.text)
def on_delete(self, req, resp, dataset): queue = dataset_queue(dataset) deleted = delete_dataset.apply_async(queue=queue, args=(self.store.annex_path, dataset)) deleted.wait() if deleted.failed(): resp.media = {'error': 'dataset not found'} resp.status = falcon.HTTP_NOT_FOUND else: resp.media = {} resp.status = falcon.HTTP_OK
def on_get(self, req, resp, dataset): """ Return draft state (other than files). """ if dataset: queue = dataset_queue(dataset) # Maybe turn this into status? partial = is_dirty.apply_async(queue=queue, args=(self.annex_path, dataset)) partial.wait() resp.media = {'partial': partial.get()} resp.status = falcon.HTTP_OK
def publish_snapshot(store, dataset, snapshot, cookies=None, realm=None): """Publish a snapshot tag to S3, GitHub or both.""" ds = store.get_dataset(dataset) siblings = ds.siblings() realm = get_dataset_realm(ds, siblings, realm) # Create the sibling if it does not exist s3_sibling(ds, siblings) # Export to S3 and GitHub in another worker publish_s3_async \ .s(store.annex_path, dataset, snapshot, realm.s3_remote, realm.s3_bucket, cookies) \ .apply_async(queue=dataset_queue(dataset)) # Public publishes to GitHub if realm == DatasetRealm.PUBLIC and DATALAD_GITHUB_EXPORTS_ENABLED: # Create Github sibling only if GitHub is enabled github_sibling(ds, dataset, siblings) publish_github_async \ .s(store.annex_path, dataset, snapshot, realm.github_remote) \ .apply_async(queue=dataset_queue(dataset))
def on_get(self, req, resp, dataset, filename=None, snapshot='HEAD'): ds_path = self.store.get_dataset_path(dataset) if filename: try: ds = self.store.get_dataset(dataset) if ds.repo.is_under_annex([filename])[0]: path = ds.repo.repo.git.show(snapshot + ':' + filename) # remove leading relative folder paths fd = path[path.find('.git/annex'):] # if fd fails, that means the file is not present in the annex and we need to get it from s3 # so we send the client a 404 to indicate the file was not found locally. fd = open(os.path.join(ds_path, fd), 'rb') resp.stream = fd resp.stream_len = os.fstat(fd.fileno()).st_size resp.status = falcon.HTTP_OK else: resp.body = ds.repo.repo.git.show(snapshot + ':' + filename) resp.status = falcon.HTTP_OK except git.exc.GitCommandError: # File is not present in tree resp.media = {'error': 'file not found in git tree'} resp.status = falcon.HTTP_NOT_FOUND except IOError: # File is not kept locally resp.media = {'error': 'file not found'} resp.status = falcon.HTTP_NOT_FOUND except: # Some unknown error resp.media = { 'error': 'an unknown error occurred accessing this file' } resp.status = falcon.HTTP_INTERNAL_SERVER_ERROR self.logger.exception( 'An unknown error processing file "{}"'.format(filename)) else: # Request for index of files # Return a list of file objects # {name, path, size} queue = dataset_queue(dataset) if "untracked" in req.params: files = get_untracked_files.apply_async( queue=queue, args=(self.store.annex_path, dataset)) resp.media = {'files': files.get()} else: files = get_files.apply_async(queue=queue, args=(self.store.annex_path, dataset, snapshot)) resp.media = {'files': files.get()}
def on_get(self, req, resp, dataset, snapshot=None): """Get the tree of files for a snapshot.""" queue = dataset_queue(dataset) if snapshot: ds = self.store.get_dataset(dataset) files = get_files.s(self.store.annex_path, dataset, branch=snapshot).apply_async(queue=queue) resp.media = self._get_snapshot(dataset, snapshot, files.get()) resp.status = falcon.HTTP_OK else: tags = get_snapshots.s(self.store.annex_path, dataset).apply_async(queue=queue) # Index of all tags ds = self.store.get_dataset(dataset) resp.media = {'snapshots': tags.get()} resp.status = falcon.HTTP_OK
def on_post(self, req, resp, dataset, snapshot): """Commit a revision (snapshot) from the working tree.""" queue = dataset_queue(dataset) create = create_snapshot.si( self.store.annex_path, dataset, snapshot).set(queue=queue) created = create.apply_async() created.wait() if not created.failed(): resp.media = get_snapshot.s( self.store.annex_path, dataset, snapshot).apply_async(queue=queue).get() resp.status = falcon.HTTP_OK # Publish after response publish = publish_snapshot.s( self.store.annex_path, dataset, snapshot, req.cookies) publish.apply_async(queue=queue) else: resp.media = {'error': 'tag already exists'} resp.status = falcon.HTTP_CONFLICT
def on_post(self, req, resp, dataset): ds_path = self.store.get_dataset_path(dataset) if (os.path.isdir(ds_path)): resp.media = {'error': 'dataset already exists'} resp.status = falcon.HTTP_CONFLICT else: queue = dataset_queue(dataset) # Record if this was done on behalf of a user name, email = get_user_info(req) created = create_dataset.apply_async(queue=queue, args=(self.store.annex_path, dataset, name, email)) created.wait() if created.failed(): resp.media = {'error': 'dataset creation failed'} resp.status = falcon.HTTP_500 else: resp.media = {} resp.status = falcon.HTTP_OK
def on_post(self, req, resp, dataset, filename): """Post will create new files and adds them to the annex if they do not exist, else update existing files.""" queue = dataset_queue(dataset) if filename: ds_path = self.store.get_dataset_path(dataset) file_path = os.path.join(ds_path, filename) if os.path.exists(file_path): ds = self.store.get_dataset(dataset) media_dict = {'updated': filename} # Record if this was done on behalf of a user name, email = get_user_info(req) if name and email: media_dict['name'] = name media_dict['email'] = email unlock = unlock_files.apply_async(queue=queue, args=(self.annex_path, dataset), kwargs={'files': [filename]}) unlock.wait() self._update_file(file_path, req.stream) # ds.publish(to='github') resp.media = media_dict resp.status = falcon.HTTP_OK else: try: # Make any missing parent directories os.makedirs(os.path.dirname(file_path), exist_ok=True) # Begin writing stream to disk self._update_file(file_path, req.stream) # Add to dataset ds = self.store.get_dataset(dataset) media_dict = {'created': filename} resp.media = media_dict resp.status = falcon.HTTP_OK except PermissionError: resp.media = {'error': 'file already exists'} resp.status = falcon.HTTP_CONFLICT else: resp.media = {'error': 'filename is missing'} resp.status = falcon.HTTP_BAD_REQUEST
def schedule_celery_tasks(sender, **kwargs): """Run all periodic tasks.""" sender.add_periodic_task( 60 * 15, audit_datasets.s(annex_path), queue=dataset_queue('publish'))