Example #1
0
def test_issue12(t, c_name):
    src = '''[
{"id": "1", "c1": 1, "c2": 2, "c3": 3 },
{"id": "2", "c1": 2, "c2": 2, "c3": 3 },
{"id": "3", "c1": 3, "c2": 3, "c3": 3 },
{"id": "4", "c1": 1, "c2": 1, "c3": 1 },
{"id": "5", "c1": 6, "c2": 6, "c3": 6 }
]'''
    #dataset.verbose_on() # DEBUG
    #dataset.use_strict_dotpath(True) # DEBUG
    if dataset.status(c_name) == False:
        if not dataset.init(c_name):
            err = dataset.error_message()
            t.error(f'failed to create {c_name}')
            return
    objects = json.loads(src)
    for obj in objects:
        key = obj['id']
        if dataset.has_key(c_name, key):
            dataset.update(c_name, key, obj)
        else:
            dataset.create(c_name, key, obj)
    f_names = dataset.frames(c_name)
    for f_name in f_names:
        ok = dataset.delete_frame(c_name, f_name)
        if ok == False:
            err = dataset.error_message()
            t.error(f'Failed to delete {f_name} from {c_name} -> "{err}"')
            return
        if dataset.has_frame(c_name, f_name) == True:
            t.error(
                f'Failed to delete frame {c_name} from {c_name}, frame still exists'
            )
            return
    f_name = 'issue12'
    dot_paths = [".c1", "c3"]
    labels = [".col1", ".col3"]
    keys = dataset.keys(c_name)
    if not dataset.frame_create(c_name, f_name, keys, dot_paths, labels):
        err = dataset.error_message()
        t.error(f'failed to create {f_name} from {c_name}, {err}')
    if not dataset.has_frame(c_name, f_name):
        err = dataset.error_message()
        t.error(f'expected frame {f_name} to exists, {err}')
        return
    f_keys = dataset.frame_keys(c_name, f_name)
    if len(f_keys) == 0:
        err = dataset.error_message()
        t.error(f'expected keys in {f_name}, got zero, {err}')
        return
    f_objects = dataset.frame_objects(c_name, f_name)
    if len(f_objects) == 0:
        err = dataset.error_message()
        t.error(f'expected objects in {f_name}, got zero, {err}')
        return
    if not dataset.delete_frame(c_name, f_name):
        err = dataset.error_message()
        t.error(f'expected to delete {f_name} in {c_name}, {err}')
Example #2
0
def make_link_history(collection, resolver, url, note):
    """Make an entry in our link history collection"""
    now = datetime.today().isoformat()
    # Run checks on both resoler and final URL
    try:
        target = requests.get(url)
    except requests.exceptions.ConnectionError:
        target = requests.Response()
        target.status_code = 404
        target.url = ''
    if target.status_code != 200:
        print(
            f"Target URL {url} returns Error status code {target.status_code}")
    if links_differ(target.url, url):
        print(f"Target URL '{url}' redirects to '{target.url}'")
    try:
        get = requests.get(f"https://resolver.library.caltech.edu/{resolver}")
    except requests.exceptions.ConnectionError:
        get = requests.Response()
        get.status_code = 404
        get.url = ''
    if links_differ(get.url, url):
        print(f"Mismatch between expected url '{url}' and actual '{get.url}'")
    if get.status_code != 200:
        print(
            f"Resolver URL ({resolver}) '{get.url}' returns Error status code {get.status_code}"
        )
    entry = {
        "expected-url": url,
        "url": get.url,
        "modified": now,
        "code": get.status_code,
        "note": note,
    }
    # If existing, push into history
    if dataset.has_key(collection, resolver):
        existing, err = dataset.read(collection, resolver)
        if err != "":
            print(err)
            exit()
        if save_history(existing, url, get):
            past_history = existing.pop("history")
            past_history.append(existing)
            entry["history"] = past_history
            if not dataset.update(collection, resolver, entry):
                print(dataset.error_message())
                exit()
    else:
        entry["history"] = []
        if not dataset.create(collection, resolver, entry):
            print(dataset.error_message())
            exit()
Example #3
0
def test_join(t, collection_name):
    key = "test_join1"
    obj1 = {"one": 1}
    obj2 = {"two": 2}
    if dataset.status(collection_name) == False:
        t.error("Failed, collection status is False,", collection_name)
        return
    ok = dataset.has_key(collection_name, key)
    err = ''
    if ok == True:
        ok = dataset.update(collection_nane, key, obj1)
    else:
        ok = dataset.create(collection_name, key, obj1)
    if ok == False:
        err = dataset.error_message()
        t.error(
            f'Failed, could not add record for test ({collection_name}, {key}, {obj1}), {err}'
        )
        return
    if dataset.join(collection_name, key, obj2, overwrite=False) == False:
        err = dataset.error_message()
        t.error(
            f'Failed, join for {collection_name}, {key}, {obj2}, overwrite = False -> {err}'
        )
    obj_result, err = dataset.read(collection_name, key)
    if err != '':
        t.error(f'Unexpected error for {key} in {collection_name}, {err}')
    if obj_result.get('one') != 1:
        t.error(f'Failed to join append key {key}, {obj_result}')
    if obj_result.get("two") != 2:
        t.error(f'Failed to join append key {key}, {obj_result}')
    obj2['one'] = 3
    obj2['two'] = 3
    obj2['three'] = 3
    if dataset.join(collection_name, key, obj2, overwrite=True) == False:
        err = dataset.error_message()
        t.error(
            f'Failed to join overwrite {collection_name}, {key}, {obj2}, overwrite = True -> {err}'
        )
    obj_result, err = dataset.read(collection_name, key)
    if err != '':
        t.error(f'Unexpected error for {key} in {collection_name}, {err}')
    for k in obj_result:
        if k != '_Key' and obj_result[k] != 3:
            t.error('Failed to update value in join overwrite', k, obj_result)
Example #4
0
def match_codemeta():
    collection = "github_records.ds"
    keys = dataset.keys(collection)
    for k in keys:
        existing, err = dataset.read(collection, k)
        if err != "":
            print(f"Unexpected error on read: {err}")
        if "completed" not in existing:
            print("Processing new record ", k)
            if dataset.attachments(collection, k) != "":
                dataset.detach(collection, k)

                # Update CaltechDATA
                token = os.environ["TINDTOK"]

                infile = open("codemeta.json", "r")
                try:
                    meta = json.load(infile)
                except:
                    print("Invalid json file - Skipping forever ", k)
                else:
                    standardized = codemeta_to_datacite(meta)

                    # Check that all records have a GitHub subject tag
                    add = True
                    for s in standardized["subjects"]:
                        if s["subject"] == "Github":
                            add = False
                        if s["subject"] == "GitHub":
                            add = False
                    if add == True:
                        standardized["subjects"].append({"subject": "GitHub"})
                    response = caltechdata_edit(token, k, standardized, {}, {}, True)
                    print(response)
                os.system("rm codemeta.json")

            existing["completed"] = "True"
            if not dataset.update(collection, k, existing):
                err = dataset.error_message()
                print(f"Unexpected error on read: {err}")
Example #5
0
def migrate_attachment(c_name, key):
    obj, err = dataset.read(c_name, key)
    obj_path = dataset.path(c_name, key).replace(key + ".json", "")
    tarball = os.path.join(obj_path, key + ".tar")
    if os.path.exists(tarball):
        tar = tarfile.open(tarball)
        tar.extractall()
        tar.close()
        files = os.listdir()
        # Prune _Attachment from object and resave
        if "_Attachments" in obj:
            del obj["_Attachments"]
            err = dataset.update(c_name, key, obj)
            if err != "":
                print(f"Can't remove _Attachments metadata, {err}")
                sys.exit(1)
        for fname in files:
            print(".", end="")
            reattach(c_name, key, "v0.0.0", fname)
            os.remove(fname)
        # NOTE: if all re-attached then we need to remove tarball too
        os.remove(tarball)
        sys.stdout.flush()
Example #6
0
def read_records(data, current, collection):
    # read records in 'hits' structure
    for record in data:
        rid = str(record["id"])
        metadata = record["metadata"]
        download = False  # Flag for downloading files
        # Do we need to download?
        if "electronic_location_and_access" in metadata:
            # Get information about already backed up files:
            existing_size = []
            existing_names = []
            if rid in current:
                # Get existing files
                attachments = dataset.attachments(collection, rid)
                for a in attachments:
                    split = a.split(" ")
                    #Handle file names with spaces; size will always be last
                    size = split[-1]
                    name = a.replace(f' {size}', '')
                    existing_names.append(name)
                    existing_size.append(size)
            # Look at all files
            count = len(metadata["electronic_location_and_access"])
            dl = 0
            for erecord in metadata["electronic_location_and_access"]:
                # Check if file has been downloaded
                size = erecord["file_size"]
                name = erecord["electronic_name"][0]
                if size in existing_size and name in existing_names:
                    dl = dl + 1
            if dl == count:
                print(
                    "files already downloaded ",
                    existing_size,
                    existing_names,
                )
                download = False
            else:
                print("file mismatch ", existing_size, existing_names, dl,
                      count)
                download = True

        # Save results in dataset
        print("Saving record " + rid)

        if rid in current:
            print('Update')
            update = dataset.update(collection, rid, record)
            if update == False:
                print(
                    f"Failed, could not create record: {dataset.error_message()}"
                )
                exit()
        else:
            create = dataset.create(collection, rid, record)
            print('CREATED', create, rid)
            if create == False:
                print(
                    f"Failed, could not create record: {dataset.error_message()}"
                )
                exit()

        if download == True:
            files = []

            print("Downloading files for ", rid)

            for erecord in metadata["electronic_location_and_access"]:
                f = download_file(erecord, rid)
                if f != None:
                    files.append(f)

            print(files)
            print("Attaching files")

            if len(files) != 0:
                err = dataset.attach(collection, rid, files)
                if err == False:
                    print(f"Failed on attach {dataset.error_message()}")
                    exit()

            for f in files:
                if f != None:
                    os.remove(f)
Example #7
0
def get_crossref_refs(prefix, done=False, new=True):
    # New=True will download everything from scratch and delete any existing records

    collection = "crossref_refs.ds"

    if new == True:
        if os.path.exists(collection) == True:
            shutil.rmtree(collection)

    if os.path.isdir(collection) == False:
        if not dataset.init(collection):
            print("Dataset failed to init collection")
            exit()

    base_url = (
        "https://api.eventdata.crossref.org/v1/[email protected]&source=crossref&obj-id.prefix="
        + prefix)

    collected = dataset.has_key(collection, "captured")

    cursor = ""
    count = 0
    while cursor != None:
        if collected == True:
            date, err = dataset.read(collection, "captured")
            if err != "":
                print("error on read: " + err)
            date = date["captured"]
            print(date)
            url = base_url + "&from-collected-date=" + date
        else:
            url = base_url
        if cursor != "":
            url = url + "&cursor=" + cursor
        print(url)
        r = requests.get(url)
        records = r.json()
        if records["status"] == "failed":
            print(records)
            break
        for rec in records["message"]["events"]:
            # Save results in dataset
            print(count, rec["id"])
            count = count + 1  # Just for prettyness
            if not dataset.create(collection, rec["id"], rec):
                err = dataset.error_message()
                print("Error in saving record: " + err)

        if cursor == records["message"]["next-cursor"]:
            # Catches bug where we get the same curser back at end of results
            break
        if records["message"]["total-results"] > count:
            cursor = records["message"]["next-cursor"]
        else:
            cursor = None

    if collected == True:
        date, err = dataset.read(collection, "captured")
        if err != "":
            print("Error in reading date: " + err)
        date = date["captured"]

        # Check Deleted
        cursor = ""
        while cursor != None:
            del_url = "https://api.eventdata.crossref.org/v1/events/[email protected]&source=crossref"
            full = del_url + "&from-collected-date=" + date + "&cursor=" + cursor
            r = requests.get(full)
            records = r.json()
            for rec in records["message"]["events"]:
                # Delete results in dataset
                print("Deleted: ", rec["id"])
                if not dataset.delete(collection, rec["id"]):
                    err = dataset.error_message()
                    print(f"Unexpected error on read: {err}")
            cursor = records["message"]["next-cursor"]

        # Check Edited
        cursor = ""
        while cursor != None:
            del_url = "https://api.eventdata.crossref.org/v1/events/[email protected]&source=crossref"
            full = del_url + "&from-collected-date=" + date + "&cursor=" + cursor
            r = requests.get(full)
            records = r.json()
            for rec in records["message"]["events"]:
                # Update results in dataset
                print("Update: ", rec["id"])
                if not dataset.update(collection, rec["id"], rec):
                    err = dataset.error_message()
                    print(f"Unexpected error on write: {err}")
            cursor = records["message"]["next-cursor"]

    if done:
        date = datetime.date.today().isoformat()
        record = {"captured": date}
        if dataset.has_key(collection, "captured"):
            if not dataset.update(collection, "captured", record):
                err = dataset.error_message()
                print(f"Unexpected error on update: {err}")
        else:
            if not dataset.create(collection, "captured", record):
                err = dataset.error_message()
                print(f"Unexpected error on create: {err}")
Example #8
0
#
# Loop through the keys, fetch the record and append a _Key: "deposit" to
# each object.
#

c_name = "people.ds"
keys = dataset.keys(c_name)
#print(f"DEBUG Keys: {keys}")
for key in keys:
    print(f"Fixing key {key}")
    data, err = dataset.read(c_name, key)
    if err != "":
        print(f"Error read {c_name} -> {key}, {err}")
        sys.exit(1)
    # Make fieldname lower case
    dt = datetime.now().strftime('%Y-%m-%d %H:%I:%S')
    obj = {
        "_Key": key,
        "_State": "deposit",
        "_Updated": f"{dt}",
        "_Created": f"{dt}"
    }
    for field in data:
        fkey = field.lower()
        if not ' ' in fkey:
            obj[fkey] = data[field]
    err = dataset.update(c_name, key, obj)
    if err != "":
        print(f"Error write {c_name} -> {key}, {err}")
        sys.exit(1)
Example #9
0
# crawl docs_dir and ingest files into data collection.
for path, folders, files in os.walk(docs_dir):
    #log.print(f"Processing {path}")
    for filename in files:
        if filename.endswith(".md"):
            f_name = os.path.join(path, filename)
            log.print(f"Ingesting {f_name}")
            metadata = frontmatter(f_name)
            with open(f_name) as f:
                src = f.read()
            if "id" in metadata:
                key = str(metadata["id"])
                if dataset.has_key(c_name, key):
                    err = dataset.update(c_name, key, {
                        "metadata": metadata,
                        "content": f_name,
                        "src": src
                    })
                else:
                    err = dataset.create(c_name, key, {
                        "metadata": metadata,
                        "content": f_name,
                        "src": src
                    })
                if err != "":
                    log.fatal(err)
            else:
                log.print(f"Warning, no front matter for {f_name}")

# for each dataset record render appropriate HTML pages
keys = dataset.keys(c_name)
Example #10
0
    keys = dataset.keys(profile_ds)
    labels = ["orcid", "creator_id"]
    dot_paths = [".orcid", ".creator_id"]

    all_metadata = get_records(dot_paths, "profile", profile_ds, keys, labels)
    for profile in all_metadata:
        if "creator_id" in profile:
            idv = profile["creator_id"]
        else:
            print("ERROR", profile)
        for person in people:
            if person["Authors_ID"] != "":
                if person["Authors_ID"] == idv:
                    if person["ORCID"] == "":
                        person["ORCID"] = profile["orcid"]
                        dataset.update(import_coll, person["CL_PEOPLE_ID"], person)
                        print("Updated ", person["CL_PEOPLE_ID"])
                    elif person["ORCID"] != profile["orcid"]:
                        print(
                            "Inconsistent ORCIDS for ",
                            person["CL_PEOPLE_ID"],
                            person["ORCID"],
                            "CaltechAUTHORS",
                            profile["orcid"],
                        )

    # TODO - port to python
    # Run on command line
    # dataset frame -all imported.ds gsheet-sync ._Key .ORCID
    # dataset frame-labels imported.ds gsheet-sync "CL_PEOPLE_ID" "ORCID"
    # dataset sync-send imported.ds gsheet-sync 1ZI3-XvQ_3rLcKrF-4FBa2tEInIdQfOnGJ9L_NmhmoGs CaltechPEOPLE
Example #11
0
            }

            #assert schema40.validate(metadata)
            #Debugging if this fails
            #v = schema40.validator.validate(metadata)
            #errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
            #for error in errors:
            #    print(error.message)
            xml = schema40.tostring(metadata)
            result = d.metadata_post(xml)
            identifier = result.split('(')[1].split(')')[0]
            d.doi_post(identifier, inputv['url'])
            print('Completed ' + identifier)

            inputv['doi'] = identifier
            err = dataset.update(collection, key, inputv)

            token = os.environ['MAILTOK']

            email = inputv['email']
            url = inputv['url']

            send_simple_message(token, email, identifier, url)

        else:
            print("Web archiving is not complete for " + inputv['name'])

dot_exprs =\
['.email','.url','.title','.author','.affiliation','.orcid','.license','.prefix','.archive_complete','.doi']
column_names = [
    'email', 'url', 'title', 'author', 'affiliation', 'orcid', 'license',
Example #12
0
def aggregate_usage(usage_collection, month_collection):
    keys = dataset.keys(usage_collection)
    keys.remove("end-date")
    for k in progressbar(keys):
        record, err = dataset.read(usage_collection, k)
        if err != "":
            print(err)
        use = {}
        views = {}
        for usage in record["performance"]:
            split = usage["period"].split("-")
            month = split[0] + "-" + split[1]
            for u in usage["instance"]:
                metric = u["metric-type"]
                if metric == "unique-dataset-requests":
                    if month in use:
                        use[month] += u["count"]
                    else:
                        use[month] = u["count"]
                if metric == "unique-dataset-investigations":
                    if month in views:
                        views[month] += u["count"]
                    else:
                        views[month] = u["count"]
        # Strip non-counter stuff
        record.pop("_Key")
        record.pop("grand-total-unique-requests")
        record.pop("grand-total-unique-investigations")
        # go across months
        for view in views:
            split = view.split("-")
            date_obj = datetime(int(split[0]), int(split[1]), 1)
            d_range = get_month_day_range(date_obj)
            performance = [
                {
                    "period": {
                        "begin-date": d_range[0].date().isoformat(),
                        "end-date": d_range[1].date().isoformat(),
                    },
                    "instance": [],
                }
            ]
            v = views[view]
            performance[0]["instance"].append(
                {
                    "count": v,
                    "metric-type": "unique-dataset-investigations",
                    "access-method": "regular",
                }
            )
            # Handle when we have both views and uses in a given month
            if view in use:
                u = use[view]
                performance[0]["instance"].append(
                    {
                        "count": u,
                        "metric-type": "unique-dataset-requests",
                        "access-method": "regular",
                    }
                )
            existing, err = dataset.read(month_collection, view)
            if err != "":
                print(err)
            record["performance"] = performance
            existing["report-datasets"].append(record)
            if not dataset.update(month_collection, view, existing):
                err = dataset.error_message()
                print(err)
        for use_date in use:
            # We only have use-only records left to handle
            if use_date not in views:
                u = use[use_date]
                split = use_date.split("-")
                date_obj = datetime(int(split[0]), int(split[1]), 1)
                d_range = get_month_day_range(date_obj)
                performance = [
                    {
                        "period": {
                            "begin-date": d_range[0].date().isoformat(),
                            "end-date": d_range[1].date().isoformat(),
                        },
                        "instance": [
                            {
                                "count": u,
                                "metric-type": "unique-dataset-requests",
                                "access-method": "regular",
                            }
                        ],
                    }
                ]
                existing, err = dataset.read(month_collection, view)
                if err != "":
                    print(err)
                record["performance"] = performance
                existing["report-datasets"].append(record)
                if not dataset.update(month_collection, view, existing):
                    err = dataset.error_message()
                    print(err)
Example #13
0
def get_usage(usage_collection, mapping, token):
    """Collect usage into a usage object for items in CaltechDATA"""

    # Find time periods
    datev, err = dataset.read(usage_collection, "end-date")
    new_start = datetime.fromtimestamp(datev["end-date"])
    now = datetime.now().timestamp()
    # minutes in range
    minutes_diff = math.ceil(
        (datetime.fromtimestamp(now) - new_start).total_seconds() / 60.0
    )

    # Get number of visitors since last harvest
    stats_url_base = "https://stats.tind.io/index.php?module=API&method=Live.getCounters&idSite=1161&format=JSON"

    token_s = "&token_auth=" + token

    stats_url = f"{stats_url_base}{token_s}&lastMinutes={minutes_diff}"
    response = requests.get(stats_url)
    if response.status_code != 200:
        print(response.text)
        print(stats_url)
    visitors = response.json()[0]["visits"]

    print(visitors)
    visit_url_base = "https://stats.tind.io/index.php?module=API&method=Live.getLastVisitsDetails&idSite=1161&format=json&filter_limit=1000"

    print("Getting usage")
    usage = []
    # We will page through visitors in chunks of 1000
    chunks = math.ceil(int(visitors) / 1000)
    if chunks > 1:
        url = visit_url_base + token_s + "&filter_limit=1000"
        process_visits(url, mapping)
        for c in progressbar(range(chunks)):
            url = f"{visit_url_base}{token_s}&filter_limit=1000&filter_offset={c*1000}"
            usage += process_visits(url, mapping)
    else:
        url = f"{visit_url_base}{token_s}&filter_limit={visitors}"
        usage = process_visits(url, mapping)

    print("Writing usage")
    for use in progressbar(usage):
        date = use["date"]
        if "downloads" in use and "views" in use:
            records = use["views"].union(use["downloads"])
        elif "views" in use:
            records = use["views"]
        else:
            records = use["downloads"]
        for rec in records:
            data, err = dataset.read(usage_collection, rec)
            if err == "":
                # We only track usage from live records
                instance = {"instance": [], "period": date}
                if "views" in use:
                    if rec in use["views"]:
                        instance["instance"].append(
                            {
                                "access-method": "regular",
                                "count": 1,
                                "metric-type": "unique-dataset-investigations",
                            }
                        )
                        # print(data,rec)
                        data["grand-total-unique-investigations"] += 1
                if "downloads" in use:
                    if rec in use["downloads"]:
                        instance["instance"].append(
                            {
                                "access-method": "regular",
                                "count": 1,
                                "metric-type": "unique-dataset-requests",
                            }
                        )
                        data["grand-total-unique-requests"] += 1
                data["performance"].append(instance)
                dataset.update(usage_collection, rec, data)

    dataset.update(usage_collection, "end-date", {"end-date": now})
Example #14
0
def get_wos_refs(new=True):
    # New=True will download everything from scratch and delete any existing records

    collection = "all_wos.ds"

    if new == True:
        if os.path.exists(collection) == True:
            shutil.rmtree(collection)

    if os.path.isdir(collection) == False:
        ok = dataset.init(collection)
        if ok == False:
            print("Dataset failed to init collection")
            exit()

    # Get access token from WOS sed as environment variable with source token.bash
    token = os.environ["WOSTOK"]

    headers = {"X-ApiKey": token, "Content-type": "application/json"}

    # Run query to get scope of records

    base_url = "https://api.clarivate.com/api/wos/?databaseId=WOK"

    collected = dataset.has_key(collection, "captured")

    if collected == True:
        date = dataset.read(collection, "captured")
        date = date[0]["captured"]
        date = datetime.fromisoformat(date)
        current = datetime.today()
        diff = current - date
        base_url = base_url + "&loadTimeSpan=" + str(diff.days) + "D"

    date = datetime.today().isoformat()
    record = {"captured": date}
    if dataset.has_key(collection, "captured"):
        err = dataset.update(collection, "captured", record)
        if err != "":
            print(f"Unexpected error on update: {err}")
    else:
        err = dataset.create(collection, "captured", record)
        if err != "":
            print(f"Unexpected error on create: {err}")

    query = "OG=(California Institute of Technology)"
    query = urllib.parse.quote_plus(query)
    url = base_url + "&usrQuery=" + query + "&count=100&firstRecord=1"

    response = requests.get(url, headers=headers)
    response = response.json()
    record_count = response["QueryResult"]["RecordsFound"]
    print(record_count, " Records from WOS")
    query_id = response["QueryResult"]["QueryID"]
    try:
        records = response["Data"]["Records"]["records"]["REC"]
    except:
        print(response)
    write_records(records, collection)
    # We have saved the first 100 records
    record_start = 101
    record_count = record_count - 100

    query_url = "https://api.clarivate.com/api/wos/query/"

    while record_count > 0:
        print(record_count)
        print(len(records), "records")
        if record_count > 100:
            url = (
                query_url
                + str(query_id)
                + "?count=100&firstRecord="
                + str(record_start)
            )
            response = requests.get(url, headers=headers)
            response = response.json()
            try:
                records = response["Records"]["records"]["REC"]
            except:
                print(response)
            write_records(records, collection)
            record_start = record_start + 100
            record_count = record_count - 100
        else:
            url = (
                query_url
                + str(query_id)
                + "?count="
                + str(record_count)
                + "&firstRecord="
                + str(record_start)
            )
            response = requests.get(url, headers=headers)
            response = response.json()
            records = response["Records"]["records"]["REC"]
            write_records(records, collection)
            record_count = 0

    print("Downloaded all records ")
Example #15
0
def test_attachments(t, collection_name):
    t.print("Testing attach, attachments, detach and prune")
    # Generate two files to attach.
    with open('a1.txt', 'w') as text_file:
        text_file.write('This is file a1')
    with open('a2.txt', 'w') as text_file:
        text_file.write('This is file a2')
    filenames = ['a1.txt', 'a2.txt']

    if dataset.status(collection_name) == False:
        t.error("Failed,", collection_name, "missing")
        return
    keys = dataset.keys(collection_name)
    if len(keys) < 1:
        t.error("Failed,", collection_name, "should have keys")
        return

    key = keys[0]
    if dataset.attach(collection_name, key, filenames) == False:
        err = dataset.error_message()
        t.error("Failed, to attach files for", collection_name, key, filenames,
                ', ', err)
        return

    l = dataset.attachments(collection_name, key)
    if len(l) != 2:
        t.error("Failed, expected two attachments for", collection_name, key,
                "got", l)
        return

    #Check that attachments arn't impacted by update
    if dataset.update(collection_name, key, {"testing": "update"}) == False:
        err = dataset.error_message()
        t.error("Failed, to update record", collection_name, key, err)
        return
    l = dataset.attachments(collection_name, key)
    if len(l) != 2:
        t.error("Failed, expected two attachments after update for",
                collection_name, key, "got", l)
        return

    if os.path.exists(filenames[0]):
        os.remove(filenames[0])
    if os.path.exists(filenames[1]):
        os.remove(filenames[1])

    # First try detaching one file.
    if dataset.detach(collection_name, key, [filenames[1]]) == False:
        err = dataset.error_message()
        t.error("Failed, expected True for", collection_name, key,
                filenames[1], ', ', err)
    if os.path.exists(filenames[1]):
        os.remove(filenames[1])
    else:
        t.error("Failed to detch", filenames[1], "from", collection_name, key)

    # Test explicit filenames detch
    if dataset.detach(collection_name, key, filenames) == False:
        err = dataset.error_message()
        t.error("Failed, expected True for", collection_name, key, filenames,
                ', ', err)

    for fname in filenames:
        if os.path.exists(fname):
            os.remove(fname)
        else:
            t.error("Failed, expected", fname, "to be detached from",
                    collection_name, key)

    # Test detaching all files
    if dataset.detach(collection_name, key, []) == False:
        err = dataset.error_message()
        t.error("Failed, expected True for (detaching all)", collection_name,
                key, ', ', err)
    for fname in filenames:
        if os.path.exists(fname):
            os.remove(fname)
        else:
            t.error("Failed, expected", fname, "for detaching all from",
                    collection_name, key)

    if dataset.prune(collection_name, key, [filenames[0]]) == False:
        err = dataset.error_messag()
        t.error("Failed, expected True for prune", collection_name, key,
                [filenames[0]], ', ', err)
    l = dataset.attachments(collection_name, key)
    if len(l) != 1:
        t.error("Failed, expected one file after prune for", collection_name,
                key, [filenames[0]], "got", l)

    if dataset.prune(collection_name, key, []) == False:
        err = dataset.error_message()
        t.error("Failed, expected True for prune (all)", collection_name, key,
                ', ', err)
    l = dataset.attachments(collection_name, key)
    if len(l) != 0:
        t.error("Failed, expected zero files after prune for", collection_name,
                key, "got", l)
Example #16
0
def test_basic(t, collection_name):
    '''test_basic(collection_name) runs tests on basic CRUD ops'''
    # Setup a test record
    key = "2488"
    value = {
        "title":
        "Twenty Thousand Leagues Under the Seas: An Underwater Tour of the World",
        "formats": ["epub", "kindle", "plain text"],
        "authors": [{
            "given": "Jules",
            "family": "Verne"
        }],
        "url": "https://www.gutenberg.org/ebooks/2488"
    }

    # We should have an empty collection, we will create our test record.
    if dataset.create(collection_name, key, value) == False:
        err = dataset.error_message()
        t.error(f'create({collection_name}, {key}, {value}) failed, {err}')
        return

    # Check to see that we have only one record
    key_count = dataset.count(collection_name)
    if key_count != 1:
        t.error(f"Failed, expected count to be 1, got {key_count}")

    # Do a minimal test to see if the record looks like it has content
    keyList = dataset.keys(collection_name)
    rec, err = dataset.read(collection_name, key)
    if err != "":
        t.error(f"Unexpected error for {key} in {collection_name}, {err}")
    for k, v in value.items():
        if not isinstance(v, list):
            if k in rec and rec[k] == v:
                t.print("OK, found", k, " -> ", v)
            else:
                t.error(f"epxected {rec[k]} got {v}")
        else:
            if k == "formats" or k == "authors":
                t.print("OK, expected lists for", k, " -> ", v)
            else:
                t.error(f"Failed, expected {k} with list v, got {v}")

    # Test updating record
    value["verified"] = True
    if dataset.update(collection_name, key, value) == False:
        err = dataset.error_message()
        t.error(f"update({collection_name}, {key}, {value}) failed, {err}")
    rec, err = dataset.read(collection_name, key)
    if err != "":
        t.error(f"Unexpected error for {key} in {collection_name}, {err}")
    for k, v in value.items():
        if not isinstance(v, list):
            if k in rec and rec[k] == v:
                t.print("OK, found", k, " -> ", v)
            else:
                t.error("expected {rec[k]} got {v} for key {k}")
        else:
            if k == "formats" or k == "authors":
                t.print("OK, expected lists for", k, " -> ", v)
            else:
                t.error("Failed, expected {k} with a list for v, got {v}")

    # Test path to record
    expected_s = "/".join(
        [collection_name, "pairtree", "24", "88", (key + ".json")])
    expected_l = len(expected_s)
    p = dataset.path(collection_name, key)
    if len(p) != expected_l:
        t.error("Failed, expected length", expected_l, "got", len(p))
    if p != expected_s:
        t.error("Failed, expected", expected_s, "got", p)

    # Test listing records
    l = dataset.list(collection_name, [key])
    if len(l) != 1:
        t.error(
            f"list({collection_name}, [{key}]) failed, list should return an array of one record, got",
            l)
        return

    # test deleting a record
    if dataset.delete(collection_name, key) == False:
        err = dataset.error_message()
        t.error("Failed, could not delete record", key, ", ", err)