def publish_snapshot(store, dataset, snapshot, cookies=None, realm=None): """Publish a snapshot tag to S3, GitHub or both.""" ds = store.get_dataset(dataset) siblings = ds.siblings() # if realm parameter is not included, find the best target if realm is None: # if the dataset has a public sibling, use this as the export target # otherwise, use the private as the export target public_bucket_name = DatasetRealm(DatasetRealm.PUBLIC).s3_remote has_public_bucket = get_sibling_by_name(public_bucket_name, siblings) if has_public_bucket: realm = DatasetRealm(DatasetRealm.PUBLIC) else: realm = DatasetRealm(DatasetRealm.PRIVATE) else: realm = get_s3_realm(realm=realm) # Create the sibling if it does not exist s3_sibling(ds, siblings) # Export to S3 and GitHub in another worker publish_s3_async \ .s(store.annex_path, dataset, snapshot, realm.s3_remote, realm.s3_bucket, cookies) \ .apply_async(queue=publish_queue()) # Public publishes to GitHub if realm == DatasetRealm.PUBLIC and DATALAD_GITHUB_EXPORTS_ENABLED: # Create Github sibling only if GitHub is enabled github_sibling(ds, dataset, siblings) publish_github_async \ .s(store.annex_path, dataset, snapshot, realm.github_remote) \ .apply_async(queue=publish_queue())
def audit_datasets(store): dataset_dirs = os.listdir(store.annex_path) dataset = random.choice(dataset_dirs) # Randomize start time a bit to reduce risk of stampedes countdown = random.randint(1, 30) audit_remotes.apply_async( (store.annex_path, dataset), queue=publish_queue(), countdown=countdown)
def on_post(self, req, resp, dataset, snapshot): """Commit a revision (snapshot) from the working tree.""" queue = dataset_queue(dataset) media = req.media description_fields = {} snapshot_changes = [] if media != None: description_fields = media.get('description_fields') snapshot_changes = media.get('snapshot_changes') monitor_remote_configs.s( self.store.annex_path, dataset, snapshot).set( queue=publish_queue()).apply_async().get() create = create_snapshot.si( self.store.annex_path, dataset, snapshot, description_fields, snapshot_changes).set(queue=queue) created = create.apply_async() created.wait() if not created.failed(): resp.media = get_snapshot.s( self.store.annex_path, dataset, snapshot).apply_async(queue=queue).get() resp.status = falcon.HTTP_OK # Publish after response publish = publish_snapshot.s( self.store.annex_path, dataset, snapshot, req.cookies) skip_publishing = req.media != None and media.get('skip_publishing') if not skip_publishing and skip_publishing is not None: publish.apply_async(queue=queue) else: resp.media = {'error': 'tag already exists'} resp.status = falcon.HTTP_CONFLICT
def on_post(self, req, resp, dataset): datalad = self.store.get_dataset(dataset) queue = publish_queue() publish = migrate_to_bucket.s(self.store.annex_path, dataset, cookies=req.cookies) publish.apply_async(queue=queue) resp.media = {} resp.status = falcon.HTTP_OK
def publish_snapshot(store, dataset, snapshot, cookies=None, realm=None): """Publish a snapshot tag to S3, GitHub or both.""" ds = store.get_dataset(dataset) siblings = ds.siblings() realm = get_dataset_realm(ds, siblings, realm) # Create the sibling if it does not exist s3_sibling(ds, siblings) # Export to S3 and GitHub in another worker publish_s3_async \ .s(store.annex_path, dataset, snapshot, realm.s3_remote, realm.s3_bucket, cookies) \ .apply_async(queue=publish_queue()) # Public publishes to GitHub if realm == DatasetRealm.PUBLIC and DATALAD_GITHUB_EXPORTS_ENABLED: # Create Github sibling only if GitHub is enabled github_sibling(ds, dataset, siblings) publish_github_async \ .s(store.annex_path, dataset, snapshot, realm.github_remote) \ .apply_async(queue=publish_queue())
def schedule_celery_tasks(sender, **kwargs): """Run all periodic tasks.""" sender.add_periodic_task( 60 * 15, audit_datasets.s(annex_path), queue=publish_queue())