def test_issue12(t, c_name): src = '''[ {"id": "1", "c1": 1, "c2": 2, "c3": 3 }, {"id": "2", "c1": 2, "c2": 2, "c3": 3 }, {"id": "3", "c1": 3, "c2": 3, "c3": 3 }, {"id": "4", "c1": 1, "c2": 1, "c3": 1 }, {"id": "5", "c1": 6, "c2": 6, "c3": 6 } ]''' #dataset.verbose_on() # DEBUG #dataset.use_strict_dotpath(True) # DEBUG if dataset.status(c_name) == False: if not dataset.init(c_name): err = dataset.error_message() t.error(f'failed to create {c_name}') return objects = json.loads(src) for obj in objects: key = obj['id'] if dataset.has_key(c_name, key): dataset.update(c_name, key, obj) else: dataset.create(c_name, key, obj) f_names = dataset.frames(c_name) for f_name in f_names: ok = dataset.delete_frame(c_name, f_name) if ok == False: err = dataset.error_message() t.error(f'Failed to delete {f_name} from {c_name} -> "{err}"') return if dataset.has_frame(c_name, f_name) == True: t.error( f'Failed to delete frame {c_name} from {c_name}, frame still exists' ) return f_name = 'issue12' dot_paths = [".c1", "c3"] labels = [".col1", ".col3"] keys = dataset.keys(c_name) if not dataset.frame_create(c_name, f_name, keys, dot_paths, labels): err = dataset.error_message() t.error(f'failed to create {f_name} from {c_name}, {err}') if not dataset.has_frame(c_name, f_name): err = dataset.error_message() t.error(f'expected frame {f_name} to exists, {err}') return f_keys = dataset.frame_keys(c_name, f_name) if len(f_keys) == 0: err = dataset.error_message() t.error(f'expected keys in {f_name}, got zero, {err}') return f_objects = dataset.frame_objects(c_name, f_name) if len(f_objects) == 0: err = dataset.error_message() t.error(f'expected objects in {f_name}, got zero, {err}') return if not dataset.delete_frame(c_name, f_name): err = dataset.error_message() t.error(f'expected to delete {f_name} in {c_name}, {err}')
def make_link_history(collection, resolver, url, note): """Make an entry in our link history collection""" now = datetime.today().isoformat() # Run checks on both resoler and final URL try: target = requests.get(url) except requests.exceptions.ConnectionError: target = requests.Response() target.status_code = 404 target.url = '' if target.status_code != 200: print( f"Target URL {url} returns Error status code {target.status_code}") if links_differ(target.url, url): print(f"Target URL '{url}' redirects to '{target.url}'") try: get = requests.get(f"https://resolver.library.caltech.edu/{resolver}") except requests.exceptions.ConnectionError: get = requests.Response() get.status_code = 404 get.url = '' if links_differ(get.url, url): print(f"Mismatch between expected url '{url}' and actual '{get.url}'") if get.status_code != 200: print( f"Resolver URL ({resolver}) '{get.url}' returns Error status code {get.status_code}" ) entry = { "expected-url": url, "url": get.url, "modified": now, "code": get.status_code, "note": note, } # If existing, push into history if dataset.has_key(collection, resolver): existing, err = dataset.read(collection, resolver) if err != "": print(err) exit() if save_history(existing, url, get): past_history = existing.pop("history") past_history.append(existing) entry["history"] = past_history if not dataset.update(collection, resolver, entry): print(dataset.error_message()) exit() else: entry["history"] = [] if not dataset.create(collection, resolver, entry): print(dataset.error_message()) exit()
def test_join(t, collection_name): key = "test_join1" obj1 = {"one": 1} obj2 = {"two": 2} if dataset.status(collection_name) == False: t.error("Failed, collection status is False,", collection_name) return ok = dataset.has_key(collection_name, key) err = '' if ok == True: ok = dataset.update(collection_nane, key, obj1) else: ok = dataset.create(collection_name, key, obj1) if ok == False: err = dataset.error_message() t.error( f'Failed, could not add record for test ({collection_name}, {key}, {obj1}), {err}' ) return if dataset.join(collection_name, key, obj2, overwrite=False) == False: err = dataset.error_message() t.error( f'Failed, join for {collection_name}, {key}, {obj2}, overwrite = False -> {err}' ) obj_result, err = dataset.read(collection_name, key) if err != '': t.error(f'Unexpected error for {key} in {collection_name}, {err}') if obj_result.get('one') != 1: t.error(f'Failed to join append key {key}, {obj_result}') if obj_result.get("two") != 2: t.error(f'Failed to join append key {key}, {obj_result}') obj2['one'] = 3 obj2['two'] = 3 obj2['three'] = 3 if dataset.join(collection_name, key, obj2, overwrite=True) == False: err = dataset.error_message() t.error( f'Failed to join overwrite {collection_name}, {key}, {obj2}, overwrite = True -> {err}' ) obj_result, err = dataset.read(collection_name, key) if err != '': t.error(f'Unexpected error for {key} in {collection_name}, {err}') for k in obj_result: if k != '_Key' and obj_result[k] != 3: t.error('Failed to update value in join overwrite', k, obj_result)
def match_codemeta(): collection = "github_records.ds" keys = dataset.keys(collection) for k in keys: existing, err = dataset.read(collection, k) if err != "": print(f"Unexpected error on read: {err}") if "completed" not in existing: print("Processing new record ", k) if dataset.attachments(collection, k) != "": dataset.detach(collection, k) # Update CaltechDATA token = os.environ["TINDTOK"] infile = open("codemeta.json", "r") try: meta = json.load(infile) except: print("Invalid json file - Skipping forever ", k) else: standardized = codemeta_to_datacite(meta) # Check that all records have a GitHub subject tag add = True for s in standardized["subjects"]: if s["subject"] == "Github": add = False if s["subject"] == "GitHub": add = False if add == True: standardized["subjects"].append({"subject": "GitHub"}) response = caltechdata_edit(token, k, standardized, {}, {}, True) print(response) os.system("rm codemeta.json") existing["completed"] = "True" if not dataset.update(collection, k, existing): err = dataset.error_message() print(f"Unexpected error on read: {err}")
def migrate_attachment(c_name, key): obj, err = dataset.read(c_name, key) obj_path = dataset.path(c_name, key).replace(key + ".json", "") tarball = os.path.join(obj_path, key + ".tar") if os.path.exists(tarball): tar = tarfile.open(tarball) tar.extractall() tar.close() files = os.listdir() # Prune _Attachment from object and resave if "_Attachments" in obj: del obj["_Attachments"] err = dataset.update(c_name, key, obj) if err != "": print(f"Can't remove _Attachments metadata, {err}") sys.exit(1) for fname in files: print(".", end="") reattach(c_name, key, "v0.0.0", fname) os.remove(fname) # NOTE: if all re-attached then we need to remove tarball too os.remove(tarball) sys.stdout.flush()
def read_records(data, current, collection): # read records in 'hits' structure for record in data: rid = str(record["id"]) metadata = record["metadata"] download = False # Flag for downloading files # Do we need to download? if "electronic_location_and_access" in metadata: # Get information about already backed up files: existing_size = [] existing_names = [] if rid in current: # Get existing files attachments = dataset.attachments(collection, rid) for a in attachments: split = a.split(" ") #Handle file names with spaces; size will always be last size = split[-1] name = a.replace(f' {size}', '') existing_names.append(name) existing_size.append(size) # Look at all files count = len(metadata["electronic_location_and_access"]) dl = 0 for erecord in metadata["electronic_location_and_access"]: # Check if file has been downloaded size = erecord["file_size"] name = erecord["electronic_name"][0] if size in existing_size and name in existing_names: dl = dl + 1 if dl == count: print( "files already downloaded ", existing_size, existing_names, ) download = False else: print("file mismatch ", existing_size, existing_names, dl, count) download = True # Save results in dataset print("Saving record " + rid) if rid in current: print('Update') update = dataset.update(collection, rid, record) if update == False: print( f"Failed, could not create record: {dataset.error_message()}" ) exit() else: create = dataset.create(collection, rid, record) print('CREATED', create, rid) if create == False: print( f"Failed, could not create record: {dataset.error_message()}" ) exit() if download == True: files = [] print("Downloading files for ", rid) for erecord in metadata["electronic_location_and_access"]: f = download_file(erecord, rid) if f != None: files.append(f) print(files) print("Attaching files") if len(files) != 0: err = dataset.attach(collection, rid, files) if err == False: print(f"Failed on attach {dataset.error_message()}") exit() for f in files: if f != None: os.remove(f)
def get_crossref_refs(prefix, done=False, new=True): # New=True will download everything from scratch and delete any existing records collection = "crossref_refs.ds" if new == True: if os.path.exists(collection) == True: shutil.rmtree(collection) if os.path.isdir(collection) == False: if not dataset.init(collection): print("Dataset failed to init collection") exit() base_url = ( "https://api.eventdata.crossref.org/v1/[email protected]&source=crossref&obj-id.prefix=" + prefix) collected = dataset.has_key(collection, "captured") cursor = "" count = 0 while cursor != None: if collected == True: date, err = dataset.read(collection, "captured") if err != "": print("error on read: " + err) date = date["captured"] print(date) url = base_url + "&from-collected-date=" + date else: url = base_url if cursor != "": url = url + "&cursor=" + cursor print(url) r = requests.get(url) records = r.json() if records["status"] == "failed": print(records) break for rec in records["message"]["events"]: # Save results in dataset print(count, rec["id"]) count = count + 1 # Just for prettyness if not dataset.create(collection, rec["id"], rec): err = dataset.error_message() print("Error in saving record: " + err) if cursor == records["message"]["next-cursor"]: # Catches bug where we get the same curser back at end of results break if records["message"]["total-results"] > count: cursor = records["message"]["next-cursor"] else: cursor = None if collected == True: date, err = dataset.read(collection, "captured") if err != "": print("Error in reading date: " + err) date = date["captured"] # Check Deleted cursor = "" while cursor != None: del_url = "https://api.eventdata.crossref.org/v1/events/[email protected]&source=crossref" full = del_url + "&from-collected-date=" + date + "&cursor=" + cursor r = requests.get(full) records = r.json() for rec in records["message"]["events"]: # Delete results in dataset print("Deleted: ", rec["id"]) if not dataset.delete(collection, rec["id"]): err = dataset.error_message() print(f"Unexpected error on read: {err}") cursor = records["message"]["next-cursor"] # Check Edited cursor = "" while cursor != None: del_url = "https://api.eventdata.crossref.org/v1/events/[email protected]&source=crossref" full = del_url + "&from-collected-date=" + date + "&cursor=" + cursor r = requests.get(full) records = r.json() for rec in records["message"]["events"]: # Update results in dataset print("Update: ", rec["id"]) if not dataset.update(collection, rec["id"], rec): err = dataset.error_message() print(f"Unexpected error on write: {err}") cursor = records["message"]["next-cursor"] if done: date = datetime.date.today().isoformat() record = {"captured": date} if dataset.has_key(collection, "captured"): if not dataset.update(collection, "captured", record): err = dataset.error_message() print(f"Unexpected error on update: {err}") else: if not dataset.create(collection, "captured", record): err = dataset.error_message() print(f"Unexpected error on create: {err}")
# # Loop through the keys, fetch the record and append a _Key: "deposit" to # each object. # c_name = "people.ds" keys = dataset.keys(c_name) #print(f"DEBUG Keys: {keys}") for key in keys: print(f"Fixing key {key}") data, err = dataset.read(c_name, key) if err != "": print(f"Error read {c_name} -> {key}, {err}") sys.exit(1) # Make fieldname lower case dt = datetime.now().strftime('%Y-%m-%d %H:%I:%S') obj = { "_Key": key, "_State": "deposit", "_Updated": f"{dt}", "_Created": f"{dt}" } for field in data: fkey = field.lower() if not ' ' in fkey: obj[fkey] = data[field] err = dataset.update(c_name, key, obj) if err != "": print(f"Error write {c_name} -> {key}, {err}") sys.exit(1)
# crawl docs_dir and ingest files into data collection. for path, folders, files in os.walk(docs_dir): #log.print(f"Processing {path}") for filename in files: if filename.endswith(".md"): f_name = os.path.join(path, filename) log.print(f"Ingesting {f_name}") metadata = frontmatter(f_name) with open(f_name) as f: src = f.read() if "id" in metadata: key = str(metadata["id"]) if dataset.has_key(c_name, key): err = dataset.update(c_name, key, { "metadata": metadata, "content": f_name, "src": src }) else: err = dataset.create(c_name, key, { "metadata": metadata, "content": f_name, "src": src }) if err != "": log.fatal(err) else: log.print(f"Warning, no front matter for {f_name}") # for each dataset record render appropriate HTML pages keys = dataset.keys(c_name)
keys = dataset.keys(profile_ds) labels = ["orcid", "creator_id"] dot_paths = [".orcid", ".creator_id"] all_metadata = get_records(dot_paths, "profile", profile_ds, keys, labels) for profile in all_metadata: if "creator_id" in profile: idv = profile["creator_id"] else: print("ERROR", profile) for person in people: if person["Authors_ID"] != "": if person["Authors_ID"] == idv: if person["ORCID"] == "": person["ORCID"] = profile["orcid"] dataset.update(import_coll, person["CL_PEOPLE_ID"], person) print("Updated ", person["CL_PEOPLE_ID"]) elif person["ORCID"] != profile["orcid"]: print( "Inconsistent ORCIDS for ", person["CL_PEOPLE_ID"], person["ORCID"], "CaltechAUTHORS", profile["orcid"], ) # TODO - port to python # Run on command line # dataset frame -all imported.ds gsheet-sync ._Key .ORCID # dataset frame-labels imported.ds gsheet-sync "CL_PEOPLE_ID" "ORCID" # dataset sync-send imported.ds gsheet-sync 1ZI3-XvQ_3rLcKrF-4FBa2tEInIdQfOnGJ9L_NmhmoGs CaltechPEOPLE
} #assert schema40.validate(metadata) #Debugging if this fails #v = schema40.validator.validate(metadata) #errors = sorted(v.iter_errors(instance), key=lambda e: e.path) #for error in errors: # print(error.message) xml = schema40.tostring(metadata) result = d.metadata_post(xml) identifier = result.split('(')[1].split(')')[0] d.doi_post(identifier, inputv['url']) print('Completed ' + identifier) inputv['doi'] = identifier err = dataset.update(collection, key, inputv) token = os.environ['MAILTOK'] email = inputv['email'] url = inputv['url'] send_simple_message(token, email, identifier, url) else: print("Web archiving is not complete for " + inputv['name']) dot_exprs =\ ['.email','.url','.title','.author','.affiliation','.orcid','.license','.prefix','.archive_complete','.doi'] column_names = [ 'email', 'url', 'title', 'author', 'affiliation', 'orcid', 'license',
def aggregate_usage(usage_collection, month_collection): keys = dataset.keys(usage_collection) keys.remove("end-date") for k in progressbar(keys): record, err = dataset.read(usage_collection, k) if err != "": print(err) use = {} views = {} for usage in record["performance"]: split = usage["period"].split("-") month = split[0] + "-" + split[1] for u in usage["instance"]: metric = u["metric-type"] if metric == "unique-dataset-requests": if month in use: use[month] += u["count"] else: use[month] = u["count"] if metric == "unique-dataset-investigations": if month in views: views[month] += u["count"] else: views[month] = u["count"] # Strip non-counter stuff record.pop("_Key") record.pop("grand-total-unique-requests") record.pop("grand-total-unique-investigations") # go across months for view in views: split = view.split("-") date_obj = datetime(int(split[0]), int(split[1]), 1) d_range = get_month_day_range(date_obj) performance = [ { "period": { "begin-date": d_range[0].date().isoformat(), "end-date": d_range[1].date().isoformat(), }, "instance": [], } ] v = views[view] performance[0]["instance"].append( { "count": v, "metric-type": "unique-dataset-investigations", "access-method": "regular", } ) # Handle when we have both views and uses in a given month if view in use: u = use[view] performance[0]["instance"].append( { "count": u, "metric-type": "unique-dataset-requests", "access-method": "regular", } ) existing, err = dataset.read(month_collection, view) if err != "": print(err) record["performance"] = performance existing["report-datasets"].append(record) if not dataset.update(month_collection, view, existing): err = dataset.error_message() print(err) for use_date in use: # We only have use-only records left to handle if use_date not in views: u = use[use_date] split = use_date.split("-") date_obj = datetime(int(split[0]), int(split[1]), 1) d_range = get_month_day_range(date_obj) performance = [ { "period": { "begin-date": d_range[0].date().isoformat(), "end-date": d_range[1].date().isoformat(), }, "instance": [ { "count": u, "metric-type": "unique-dataset-requests", "access-method": "regular", } ], } ] existing, err = dataset.read(month_collection, view) if err != "": print(err) record["performance"] = performance existing["report-datasets"].append(record) if not dataset.update(month_collection, view, existing): err = dataset.error_message() print(err)
def get_usage(usage_collection, mapping, token): """Collect usage into a usage object for items in CaltechDATA""" # Find time periods datev, err = dataset.read(usage_collection, "end-date") new_start = datetime.fromtimestamp(datev["end-date"]) now = datetime.now().timestamp() # minutes in range minutes_diff = math.ceil( (datetime.fromtimestamp(now) - new_start).total_seconds() / 60.0 ) # Get number of visitors since last harvest stats_url_base = "https://stats.tind.io/index.php?module=API&method=Live.getCounters&idSite=1161&format=JSON" token_s = "&token_auth=" + token stats_url = f"{stats_url_base}{token_s}&lastMinutes={minutes_diff}" response = requests.get(stats_url) if response.status_code != 200: print(response.text) print(stats_url) visitors = response.json()[0]["visits"] print(visitors) visit_url_base = "https://stats.tind.io/index.php?module=API&method=Live.getLastVisitsDetails&idSite=1161&format=json&filter_limit=1000" print("Getting usage") usage = [] # We will page through visitors in chunks of 1000 chunks = math.ceil(int(visitors) / 1000) if chunks > 1: url = visit_url_base + token_s + "&filter_limit=1000" process_visits(url, mapping) for c in progressbar(range(chunks)): url = f"{visit_url_base}{token_s}&filter_limit=1000&filter_offset={c*1000}" usage += process_visits(url, mapping) else: url = f"{visit_url_base}{token_s}&filter_limit={visitors}" usage = process_visits(url, mapping) print("Writing usage") for use in progressbar(usage): date = use["date"] if "downloads" in use and "views" in use: records = use["views"].union(use["downloads"]) elif "views" in use: records = use["views"] else: records = use["downloads"] for rec in records: data, err = dataset.read(usage_collection, rec) if err == "": # We only track usage from live records instance = {"instance": [], "period": date} if "views" in use: if rec in use["views"]: instance["instance"].append( { "access-method": "regular", "count": 1, "metric-type": "unique-dataset-investigations", } ) # print(data,rec) data["grand-total-unique-investigations"] += 1 if "downloads" in use: if rec in use["downloads"]: instance["instance"].append( { "access-method": "regular", "count": 1, "metric-type": "unique-dataset-requests", } ) data["grand-total-unique-requests"] += 1 data["performance"].append(instance) dataset.update(usage_collection, rec, data) dataset.update(usage_collection, "end-date", {"end-date": now})
def get_wos_refs(new=True): # New=True will download everything from scratch and delete any existing records collection = "all_wos.ds" if new == True: if os.path.exists(collection) == True: shutil.rmtree(collection) if os.path.isdir(collection) == False: ok = dataset.init(collection) if ok == False: print("Dataset failed to init collection") exit() # Get access token from WOS sed as environment variable with source token.bash token = os.environ["WOSTOK"] headers = {"X-ApiKey": token, "Content-type": "application/json"} # Run query to get scope of records base_url = "https://api.clarivate.com/api/wos/?databaseId=WOK" collected = dataset.has_key(collection, "captured") if collected == True: date = dataset.read(collection, "captured") date = date[0]["captured"] date = datetime.fromisoformat(date) current = datetime.today() diff = current - date base_url = base_url + "&loadTimeSpan=" + str(diff.days) + "D" date = datetime.today().isoformat() record = {"captured": date} if dataset.has_key(collection, "captured"): err = dataset.update(collection, "captured", record) if err != "": print(f"Unexpected error on update: {err}") else: err = dataset.create(collection, "captured", record) if err != "": print(f"Unexpected error on create: {err}") query = "OG=(California Institute of Technology)" query = urllib.parse.quote_plus(query) url = base_url + "&usrQuery=" + query + "&count=100&firstRecord=1" response = requests.get(url, headers=headers) response = response.json() record_count = response["QueryResult"]["RecordsFound"] print(record_count, " Records from WOS") query_id = response["QueryResult"]["QueryID"] try: records = response["Data"]["Records"]["records"]["REC"] except: print(response) write_records(records, collection) # We have saved the first 100 records record_start = 101 record_count = record_count - 100 query_url = "https://api.clarivate.com/api/wos/query/" while record_count > 0: print(record_count) print(len(records), "records") if record_count > 100: url = ( query_url + str(query_id) + "?count=100&firstRecord=" + str(record_start) ) response = requests.get(url, headers=headers) response = response.json() try: records = response["Records"]["records"]["REC"] except: print(response) write_records(records, collection) record_start = record_start + 100 record_count = record_count - 100 else: url = ( query_url + str(query_id) + "?count=" + str(record_count) + "&firstRecord=" + str(record_start) ) response = requests.get(url, headers=headers) response = response.json() records = response["Records"]["records"]["REC"] write_records(records, collection) record_count = 0 print("Downloaded all records ")
def test_attachments(t, collection_name): t.print("Testing attach, attachments, detach and prune") # Generate two files to attach. with open('a1.txt', 'w') as text_file: text_file.write('This is file a1') with open('a2.txt', 'w') as text_file: text_file.write('This is file a2') filenames = ['a1.txt', 'a2.txt'] if dataset.status(collection_name) == False: t.error("Failed,", collection_name, "missing") return keys = dataset.keys(collection_name) if len(keys) < 1: t.error("Failed,", collection_name, "should have keys") return key = keys[0] if dataset.attach(collection_name, key, filenames) == False: err = dataset.error_message() t.error("Failed, to attach files for", collection_name, key, filenames, ', ', err) return l = dataset.attachments(collection_name, key) if len(l) != 2: t.error("Failed, expected two attachments for", collection_name, key, "got", l) return #Check that attachments arn't impacted by update if dataset.update(collection_name, key, {"testing": "update"}) == False: err = dataset.error_message() t.error("Failed, to update record", collection_name, key, err) return l = dataset.attachments(collection_name, key) if len(l) != 2: t.error("Failed, expected two attachments after update for", collection_name, key, "got", l) return if os.path.exists(filenames[0]): os.remove(filenames[0]) if os.path.exists(filenames[1]): os.remove(filenames[1]) # First try detaching one file. if dataset.detach(collection_name, key, [filenames[1]]) == False: err = dataset.error_message() t.error("Failed, expected True for", collection_name, key, filenames[1], ', ', err) if os.path.exists(filenames[1]): os.remove(filenames[1]) else: t.error("Failed to detch", filenames[1], "from", collection_name, key) # Test explicit filenames detch if dataset.detach(collection_name, key, filenames) == False: err = dataset.error_message() t.error("Failed, expected True for", collection_name, key, filenames, ', ', err) for fname in filenames: if os.path.exists(fname): os.remove(fname) else: t.error("Failed, expected", fname, "to be detached from", collection_name, key) # Test detaching all files if dataset.detach(collection_name, key, []) == False: err = dataset.error_message() t.error("Failed, expected True for (detaching all)", collection_name, key, ', ', err) for fname in filenames: if os.path.exists(fname): os.remove(fname) else: t.error("Failed, expected", fname, "for detaching all from", collection_name, key) if dataset.prune(collection_name, key, [filenames[0]]) == False: err = dataset.error_messag() t.error("Failed, expected True for prune", collection_name, key, [filenames[0]], ', ', err) l = dataset.attachments(collection_name, key) if len(l) != 1: t.error("Failed, expected one file after prune for", collection_name, key, [filenames[0]], "got", l) if dataset.prune(collection_name, key, []) == False: err = dataset.error_message() t.error("Failed, expected True for prune (all)", collection_name, key, ', ', err) l = dataset.attachments(collection_name, key) if len(l) != 0: t.error("Failed, expected zero files after prune for", collection_name, key, "got", l)
def test_basic(t, collection_name): '''test_basic(collection_name) runs tests on basic CRUD ops''' # Setup a test record key = "2488" value = { "title": "Twenty Thousand Leagues Under the Seas: An Underwater Tour of the World", "formats": ["epub", "kindle", "plain text"], "authors": [{ "given": "Jules", "family": "Verne" }], "url": "https://www.gutenberg.org/ebooks/2488" } # We should have an empty collection, we will create our test record. if dataset.create(collection_name, key, value) == False: err = dataset.error_message() t.error(f'create({collection_name}, {key}, {value}) failed, {err}') return # Check to see that we have only one record key_count = dataset.count(collection_name) if key_count != 1: t.error(f"Failed, expected count to be 1, got {key_count}") # Do a minimal test to see if the record looks like it has content keyList = dataset.keys(collection_name) rec, err = dataset.read(collection_name, key) if err != "": t.error(f"Unexpected error for {key} in {collection_name}, {err}") for k, v in value.items(): if not isinstance(v, list): if k in rec and rec[k] == v: t.print("OK, found", k, " -> ", v) else: t.error(f"epxected {rec[k]} got {v}") else: if k == "formats" or k == "authors": t.print("OK, expected lists for", k, " -> ", v) else: t.error(f"Failed, expected {k} with list v, got {v}") # Test updating record value["verified"] = True if dataset.update(collection_name, key, value) == False: err = dataset.error_message() t.error(f"update({collection_name}, {key}, {value}) failed, {err}") rec, err = dataset.read(collection_name, key) if err != "": t.error(f"Unexpected error for {key} in {collection_name}, {err}") for k, v in value.items(): if not isinstance(v, list): if k in rec and rec[k] == v: t.print("OK, found", k, " -> ", v) else: t.error("expected {rec[k]} got {v} for key {k}") else: if k == "formats" or k == "authors": t.print("OK, expected lists for", k, " -> ", v) else: t.error("Failed, expected {k} with a list for v, got {v}") # Test path to record expected_s = "/".join( [collection_name, "pairtree", "24", "88", (key + ".json")]) expected_l = len(expected_s) p = dataset.path(collection_name, key) if len(p) != expected_l: t.error("Failed, expected length", expected_l, "got", len(p)) if p != expected_s: t.error("Failed, expected", expected_s, "got", p) # Test listing records l = dataset.list(collection_name, [key]) if len(l) != 1: t.error( f"list({collection_name}, [{key}]) failed, list should return an array of one record, got", l) return # test deleting a record if dataset.delete(collection_name, key) == False: err = dataset.error_message() t.error("Failed, could not delete record", key, ", ", err)