def generate_base_submission(): file_name = "base_submission.json" path = os.path.join(TEST_SUBMISSIONS_DIR, file_name) mdfcc = MDFConnectClient() # service_instance is irrelevant mdfcc.create_dc_block(title="Base Deploy Testing Dataset", authors="jgaff", affiliations="UChicago") mdfcc.add_data_source(DATA_SOURCES) mdfcc.set_test(True) mdfcc.update = UPDATE_FLAG submission = mdfcc.get_submission() with open(path, 'w') as f: json.dump(submission, f) return {"success": True}
def mdf_check_status(source_id): """Check status of MDF Connect submission. Arguments: source_id (str): The source_id of the dataset to check. The source_id is returned from the submission call. """ from mdf_connect_client import MDFConnectClient mdfcc = MDFConnectClient() status = mdfcc.check_status(source_id, raw=True) if not status["success"]: raise RuntimeError(status["error"]) else: print(status["status"]["status_message"]) return status["status"]["status_message"]
sample.growing_steps)))) if len(list_of_sources): return list_of_sources[0] else: return "" return list( filter( None, list(map(lambda step: step.carbon_source, sample.growing_steps))))[0] else: return "" # Get a handle to MDF Client Connect mdfcc = MDFConnectClient(test=True, service_instance="dev") # This will prompt for a developer Token from Box # See https://developer.box.com/docs/authenticate-with-developer-token client = DevelopmentClient() # Download a hardcoded, specific folder from box for now mdf_folder = client.folder("50410951565").get() staging_folder = client.folder("59557678760").get() filepath = "../data" var_map = pd.read_csv(os.path.join(filepath, 'varmap2.csv')).to_dict() data = pd.read_csv(os.path.join(filepath, 'recipe_2018_11_26.csv')).iloc[:-1, :]
def submit_test_submissions(service_instance, submission_dir=TEST_SUBMISSIONS_DIR, verbose=True): """Submit the test submissions to Connect. Arguments: service_instance (str): The instance of Connect to test (dev or prod). submission_dir (str): Path to a dir containing the files with submissions. Default TEST_SUBMISSIONS_DIR. verbose (bool): Should intermediate status messages be printed? Default True. Returns: dict: The testing results. success (bool): True iff all the tests succeeded. passed (list of str): The source_ids that passed testing. failed (list of dicts): The testing failures, with details. Note: If the source_name of a submission contains a keyword flag, it will be processed differently. Keyword flags include: "fail": The test is a success if the submission fails, and vice-versa. """ mdfcc = MDFConnectClient(service_instance=service_instance) source_ids = [] successes = [] failures = [] curated = [] for file_name in os.listdir(submission_dir): path = os.path.join(submission_dir, file_name) if verbose: print("Submitting", file_name) with open(path) as f: submission = json.load(f) submit_res = mdfcc.submit_dataset(submission=submission) if not submit_res["success"]: if verbose: print("Error:", submit_res["error"]) failures.append(submit_res) continue source_ids.append(submit_res["source_id"]) if verbose: print(submit_res["source_id"], "submitted") if verbose: print("All tests submitted.") # Check statuses until finished while len(source_ids) > 0: sid = source_ids.pop(0) # If submission requires curation, curate it # Don't double-curate - this can cause issues if (mdfcc.get_curation_task(sid, raw=True)["success"] and sid not in curated): # Reject "fail" submissions if "fail" in sid: curation_res = mdfcc.reject_curation_submission( sid, reason="Intentional failure", prompt=False, raw=True) # Accept all other submissions else: curation_res = mdfcc.accept_curation_submission( sid, reason="Testing curation", prompt=False, raw=True) if not curation_res["success"]: if verbose: print("Could not curate curation submission {}: {}".format( sid, curation_res["error"])) failures.append(deepcopy(curation_res)) # Skip status check - test has failed continue else: curated.append(sid) # Now check the current status status = mdfcc.check_status(sid, raw=True) if not status["success"]: if verbose: print("Could not fetch status for", sid, status["error"]) # Re-queue source_id # Risk of infinite loop here if service is down; recommend using 'verbose' to catch source_ids.append(sid) continue status_res = validate_status(status["status"]) # If failed or succeeded, put result in appropriate list if status_res["result"] == "failed": failures.append(deepcopy(status_res)) if verbose: print(sid, "failed") elif status_res["result"] == "success": successes.append(sid) if verbose: print(sid, "passed") # Otherwise, is not finished and should be re-checked else: source_ids.append(sid) return { "success": (len(failures) == 0), "passed": successes, "failed": failures }
def generate_base_failure(): file_name = "base_failure.json" path = os.path.join(TEST_SUBMISSIONS_DIR, file_name) mdfcc = MDFConnectClient() # service_instance is irrelevant mdfcc.create_dc_block(title="Base Deploy Fail Dataset", authors="jgaff", affiliations="UChicago") mdfcc.add_data_source(DATA_SOURCES) # Failure point: Submission not in nanomfg organization, # but attempting to write to nanomfg project block mdfcc.set_project_block("nanomfg", {"catalyst": "bad data testing"}) mdfcc.set_test(True) mdfcc.update = UPDATE_FLAG submission = mdfcc.get_submission() with open(path, 'w') as f: json.dump(submission, f) return {"success": True}
""" Little script to force MDF to authenticate with Globus. This can be used to generate a ~/.mdf/credentials folder on demand """ from mdf_connect_client import MDFConnectClient mdfcc = MDFConnectClient(test=True, service_instance="prod")
def __init__(self): self.mdfcc = MDFConnectClient(test=True, service_instance="prod")
class MDFAdaptor: def __init__(self): self.mdfcc = MDFConnectClient(test=True, service_instance="prod") def upload_recipe(self, recipe, box_file): experiment_date = datetime.now() self.mdfcc.create_dc_block( title="Graphene Synthesis Sample " + "TBD", authors=[ "%s, %s" % (auth["last_name"], auth["first_name"]) for auth in recipe.authors ], affiliations=[auth["institution"] for auth in recipe.authors], publication_year=recipe.experiment_year, ) self.mdfcc.add_data_source( box_file.get_shared_link_download_url(access="open")) # Don't publish specific recipes. Later on, we will bundle datasets and # and publish an omnibus dataset # mdfcc.add_service("globus_publish") self.mdfcc.set_source_name(str(uuid.uuid4())) submission = self.mdfcc.get_submission() submission["projects"] = {} submission["projects"]["nanomfg"] = { "catalyst": recipe.catalyst, "max_temperature": recipe.max_temp(), "carbon_source": recipe.carbon_source(), "base_pressure": recipe.base_pressure # "sample_surface_area": recipe.sample_surface_area, # "sample_thickness": recipe.thickness, # "orientation": "", # "grain_size": "" } print("\n\n\n\n------>", submission) try: mdf_result = self.mdfcc.submit_dataset(submission=submission) except Exception as e: print("Exception submitting dataset to mdf ", str(e)) raise MDFException(e) if not mdf_result["success"]: self.mdfcc.reset_submission() print("\n\n\n--->Error-----> " + mdf_result["error"]) raise MDFException(mdf_result["error"]) print("Submitted to MDF -----> " + str(mdf_result)) self.mdfcc.reset_submission() return mdf_result["source_id"] def upload_raman_analysis(self, recipe, recipe_dataset_id, raman_set, raman_box_file): self.mdfcc.create_dc_block( title="Graphene Synthesis Raman Analysis", authors=[ "%s, %s" % (auth["last_name"], auth["first_name"]) for auth in recipe.authors ], affiliations=[auth["institution"] for auth in recipe.authors], publication_year=recipe.experiment_year, related_dois=recipe_dataset_id, ) self.mdfcc.add_data_source( raman_box_file.get_shared_link_download_url(access="open")) self.mdfcc.set_source_name(str(uuid.uuid4())) submission = self.mdfcc.get_submission() submission["projects"] = {} submission["projects"]["nanomfg"] = { "d_to_g": raman_set["d_to_g"], "gp_to_g": raman_set["gp_to_g"], "d_peak_shift": raman_set["d_peak_shift"], "d_peak_amplitude": raman_set["d_peak_amplitude"], "d_fwhm": raman_set["d_fwhm"], "g_peak_shift": raman_set["g_peak_shift"], "g_peak_amplitude": raman_set["g_peak_amplitude"], "g_fwhm": raman_set["g_fwhm"], "g_prime_peak_shift": raman_set["g_prime_peak_shift"], "g_prime_peak_amplitude": raman_set["g_prime_peak_amplitude"], "g_prime_fwhm": raman_set["g_prime_fwhm"], } print("\n\n\n\n------>", submission) try: mdf_result = self.mdfcc.submit_dataset(submission=submission) except Exception as e: print("Exception submitting raman analysis dataset to mdf ", str(e)) raise MDFException(e) if not mdf_result["success"]: self.mdfcc.reset_submission() print("\n\n\n--->Error-----> " + mdf_result["error"]) raise MDFException(mdf_result["error"]) print("Submitted raman analysis to MDF -----> " + str(mdf_result)) self.mdfcc.reset_submission() return mdf_result["source_id"]
def mdf_submit(local_json_path, globus_uri, test=True, with_doi=True, update=False): """Submit dataset to MDF Connect. Arguments: local_json_path (str): The path to the local JSON metadata file. globus_uri (str): The URI to the Globus Endpoint and path, in the form: "globus://[endpoint id]/[path to data directory]" test (bool): Is this a test submission (test submissions generate test DOIs and populate the Search index "mdf-test" instead of "mdf")? Default True. with_doi (bool): Should a DOI be minted? (Includes test DOI.) Default True. update (bool): Has this submission been made before? If so, an update will be made to the previous submission. Test submissions and non-test submissions are separate. """ import json import os from mdf_connect_client import MDFConnectClient mapping = { "custom.dynamic_mean_window_size": "xpcs.dynamic_mean_window_size", "custom.lld": "xpcs.lld", "custom.sigma": "xpcs.sigma", "custom.snophi": "xpcs.snophi", "custom.snoq": "xpcs.snoq" } mdfcc = MDFConnectClient() with open(local_json_path) as f: md = json.load(f) # DC block (title, authors, publisher, pub year, subjects) mdfcc.create_dc_block( title=os.path.basename(local_json_path).replace(".json", ""), authors=[creator["creatorName"] for creator in md["creators"]], publisher=md.get("publisher"), publication_year=md.get("publication_year"), subjects=[subject["subject"] for subject in md.get("subjects", [])]) # Add data mdfcc.add_data_source(globus_uri) # Add JSON mapping mdfcc.add_index("json", mapping) # Set test flag mdfcc.set_test(test) # Add XPCS as organization mdfcc.add_organization("XPCS 8-ID") # Set group-by-dir flag mdfcc.set_conversion_config({"group_by_dir": True}) # Add MDF Publish service if with_doi: mdfcc.add_service("mdf_publish") # Submit dataset sub_res = mdfcc.submit_dataset(update=update) if not sub_res["success"]: raise RuntimeError(sub_res["error"]) else: print("Submission '{}' started".format(sub_res["source_id"])) return "Submission '{}' started".format(sub_res["source_id"])