Esempio n. 1
0
def main():
    g_client = GirderCli("http://3.19.164.171")
    a_client = DandiAPIClient("https://api.dandiarchive.org/api")

    with a_client.session():
        g_client.dandi_authenticate()
        # gather all dandisets known to girder: hardcoded _id for "drafts" collection
        g_dandisets = list(
            g_client.listFolder("5e59bb0af19e820ab6ea6c62", "collection"))
        for dandiset, girder_id in [(x["name"], x["_id"])
                                    for x in g_dandisets]:
            if dandiset != "000026":
                continue
            print(f"DANDI:{dandiset}", end="\t")
            g_meta, g_assets_ = g_client.get_dandiset_and_assets(
                girder_id, "folder")
            g_assets = list(g_assets_)
            # harmonize and get only what we care about ATM - path and size,
            # or otherwise we would need to query each asset for metadata
            g_assets_h = set(
                (a["path"].lstrip("/"), a["size"]) for a in g_assets)

            # Yarik trusts nobody.  Two identical bugs are less likely!
            g_assets_adhoc = set(adhoc_list_girder(girder_id, g_client))

            if g_assets_h != g_assets_adhoc:
                print("ad-hoc and dandi listing of girder differs!")
                import pdb

                pdb.set_trace()

            a_meta, a_assets_ = a_client.get_dandiset_and_assets(
                dandiset, "draft")
            a_assets = list(a_assets_)
            a_assets_h = set(
                (a["path"].lstrip("/"), a["size"]) for a in a_assets)

            if a_assets_h != g_assets_h:
                print("differs")
                import pdb

                pdb.set_trace()
            else:
                print(f"{len(a_assets)} assets the same")
Esempio n. 2
0
def main(api_url, token, dandiset_path, delete_extant, only_metadata):
    client = DandiAPIClient(api_url=api_url, token=token)
    with client.session():
        for dpath in dandiset_path:
            dandiset = APIDandiset(dpath)
            if delete_extant:
                try:
                    client.get_dandiset(dandiset.identifier, "draft")
                except requests.HTTPError as e:
                    if e.response.status_code != 404:
                        raise
                else:
                    print("Dandiset", dandiset.identifier, "already exists; deleting")
                    client.delete(f"/dandisets/{dandiset.identifier}/")
            if only_metadata:
                print("Setting metadata for Dandiset", dandiset.identifier)
                client.set_dandiset_metadata(
                    dandiset.identifier, metadata=dandiset.metadata
                )
            else:
                print("Creating Dandiset", dandiset.identifier)
                client.create_dandiset(
                    name=dandiset.metadata.get("name", ""), metadata=dandiset.metadata
                )
Esempio n. 3
0
class URLUpdater:
    def __init__(self, datasets_path: Path):
        self.datasets_path = datasets_path
        self.dandi_client = DandiAPIClient("https://api.dandiarchive.org/api")
        self.s3client = boto3.client("s3",
                                     config=Config(signature_version=UNSIGNED))

    def run(self, dandisets=()):
        with self.dandi_client.session():
            for did in dandisets or self.get_dandiset_ids():
                dsdir = self.datasets_path / did
                log.info("Updating URLs for Dandiset %s", did)
                ds = Dataset(str(dsdir))
                self.update_dandiset_urls(did, ds)
                log.info("Pushing to sibling")
                ds.push(to="github")

    def update_dandiset_urls(self, dandiset_id, ds):
        if ds.repo.dirty:
            raise RuntimeError(
                "Dirty repository; clean or save before running")
        ds.repo.always_commit = False
        for a in self.dandi_client.get_dandiset_assets(dandiset_id,
                                                       "draft",
                                                       include_metadata=False):
            path = a["path"]
            log.info("Processing asset %s", path)
            if ds.repo.is_under_annex(path, batch=True):
                file_urls = set(ds.repo.get_urls(path, batch=True))
                bucket_url = self.get_file_bucket_url(dandiset_id, "draft",
                                                      a["asset_id"])
                download_url = (
                    f"https://api.dandiarchive.org/api/dandisets/{dandiset_id}"
                    f"/versions/draft/assets/{a['asset_id']}/download/")
                for url in [bucket_url, download_url]:
                    if url not in file_urls:
                        log.info("Adding URL %s to asset", url)
                        ds.repo.add_url_to_file(path, url, batch=True)
                for url in file_urls:
                    if "dandiarchive.s3.amazonaws.com/girder-assetstore/" in url:
                        log.info("Removing URL %s from asset", url)
                        ds.repo.rm_url(path, url)

            else:
                log.info("File is not managed by git annex; not updating URLs")
        log.info("Commiting changes")
        ds.save(message="Ran use-new-urls.py")

    def get_dandiset_ids(self):
        r = self.dandi_client.get("/dandisets/")
        while True:
            for d in r["results"]:
                yield d["identifier"]
            if r.get("next"):
                r = self.dandi_client.get(r.get("next"))
            else:
                break

    def get_file_bucket_url(self, dandiset_id, version_id, asset_id):
        r = self.dandi_client.send_request(
            "HEAD",
            f"/dandisets/{dandiset_id}/versions/{version_id}/assets/{asset_id}"
            "/download/",
            json_resp=False,
        )
        urlbits = urlparse(r.headers["Location"])
        s3meta = self.s3client.get_object(Bucket="dandiarchive",
                                          Key=urlbits.path.lstrip("/"))
        return urlunparse(
            urlbits._replace(query=f"versionId={s3meta['VersionId']}"))