Esempio n. 1
0
def publish_snapshot(store, dataset, snapshot, cookies=None, realm=None):
    """Publish a snapshot tag to S3, GitHub or both."""
    ds = store.get_dataset(dataset)
    siblings = ds.siblings()

    # if realm parameter is not included, find the best target
    if realm is None:
        # if the dataset has a public sibling, use this as the export target
        # otherwise, use the private as the export target
        public_bucket_name = DatasetRealm(DatasetRealm.PUBLIC).s3_remote
        has_public_bucket = get_sibling_by_name(public_bucket_name, siblings)
        if has_public_bucket:
            realm = DatasetRealm(DatasetRealm.PUBLIC)
        else:
            realm = DatasetRealm(DatasetRealm.PRIVATE)
    else:
        realm = get_s3_realm(realm=realm)

    # Create the sibling if it does not exist
    s3_sibling(ds, siblings)

    # Export to S3 and GitHub in another worker
    publish_s3_async \
        .s(store.annex_path, dataset, snapshot,
           realm.s3_remote, realm.s3_bucket, cookies) \
        .apply_async(queue=publish_queue())

    # Public publishes to GitHub
    if realm == DatasetRealm.PUBLIC and DATALAD_GITHUB_EXPORTS_ENABLED:
        # Create Github sibling only if GitHub is enabled
        github_sibling(ds, dataset, siblings)
        publish_github_async \
            .s(store.annex_path, dataset, snapshot, realm.github_remote) \
            .apply_async(queue=publish_queue())
Esempio n. 2
0
def audit_datasets(store):
    dataset_dirs = os.listdir(store.annex_path)
    dataset = random.choice(dataset_dirs)
    # Randomize start time a bit to reduce risk of stampedes
    countdown = random.randint(1, 30)
    audit_remotes.apply_async(
        (store.annex_path, dataset), queue=publish_queue(), countdown=countdown)
Esempio n. 3
0
    def on_post(self, req, resp, dataset, snapshot):
        """Commit a revision (snapshot) from the working tree."""
        queue = dataset_queue(dataset)
        media = req.media
        description_fields = {}
        snapshot_changes = []
        if media != None:
            description_fields = media.get('description_fields')
            snapshot_changes = media.get('snapshot_changes')
        
        monitor_remote_configs.s(
            self.store.annex_path, dataset, snapshot).set(
                queue=publish_queue()).apply_async().get()

        create = create_snapshot.si(
            self.store.annex_path, dataset, snapshot, description_fields, snapshot_changes).set(queue=queue)
        created = create.apply_async()
        created.wait()
        if not created.failed():
            resp.media = get_snapshot.s(
                self.store.annex_path, dataset, snapshot).apply_async(queue=queue).get()
            resp.status = falcon.HTTP_OK
            # Publish after response
            publish = publish_snapshot.s(
                self.store.annex_path, dataset, snapshot, req.cookies)
            skip_publishing = req.media != None and media.get('skip_publishing')
            if not skip_publishing and skip_publishing is not None:
                publish.apply_async(queue=queue)
        else:
            resp.media = {'error': 'tag already exists'}
            resp.status = falcon.HTTP_CONFLICT
Esempio n. 4
0
 def on_post(self, req, resp, dataset):
     datalad = self.store.get_dataset(dataset)
     queue = publish_queue()
     publish = migrate_to_bucket.s(self.store.annex_path,
                                   dataset,
                                   cookies=req.cookies)
     publish.apply_async(queue=queue)
     resp.media = {}
     resp.status = falcon.HTTP_OK
Esempio n. 5
0
def publish_snapshot(store, dataset, snapshot, cookies=None, realm=None):
    """Publish a snapshot tag to S3, GitHub or both."""
    ds = store.get_dataset(dataset)
    siblings = ds.siblings()

    realm = get_dataset_realm(ds, siblings, realm)

    # Create the sibling if it does not exist
    s3_sibling(ds, siblings)

    # Export to S3 and GitHub in another worker
    publish_s3_async \
        .s(store.annex_path, dataset, snapshot,
           realm.s3_remote, realm.s3_bucket, cookies) \
        .apply_async(queue=publish_queue())

    # Public publishes to GitHub
    if realm == DatasetRealm.PUBLIC and DATALAD_GITHUB_EXPORTS_ENABLED:
        # Create Github sibling only if GitHub is enabled
        github_sibling(ds, dataset, siblings)
        publish_github_async \
            .s(store.annex_path, dataset, snapshot, realm.github_remote) \
            .apply_async(queue=publish_queue())
Esempio n. 6
0
 def schedule_celery_tasks(sender, **kwargs):
     """Run all periodic tasks."""
     sender.add_periodic_task(
         60 * 15, audit_datasets.s(annex_path), queue=publish_queue())