def main(): base_dir, dry_run = process_input() for rec, pth in get_all_records(base_dir): print("\n\n******************\n") print("processing:", rec.oil_id, rec.metadata.name) fixer = FixAPI(rec) flag, msg = fixer.check() if flag is True: print(msg) print("Cleaning up!") fixer.cleanup() print("API is now:", rec.metadata.API) if not dry_run: print("Saving out:", pth) rec.to_file(pth) else: print("Nothing saved") elif flag is False: print(msg) print("It's a:", rec.metadata.product_type) print("Densities are:", rec.sub_samples[0].physical_properties.densities) else: print(msg)
def main(): base_dir, dry_run = ads.process_input(USAGE) count = 0 for rec, pth in ads.get_all_records(base_dir): # print("\n\n******************\n") # print("processing:", rec.oil_id) # print("API is:", rec.metadata.API) reference = rec.metadata.reference if (reference.year in {1990, 1996, 1990, 1999, 1992} and "Jokuty" in reference.reference): # rec.metadata.comments = "\n".join((rec.metadata.comments, ADDED_COMMENT)).strip() if NEW_COMMENT not in rec.metadata.comments: count += 1 if rec.metadata.comments: rec.metadata.comments = "\n\n".join( (rec.metadata.comments, NEW_COMMENT)) else: rec.metadata.comments = NEW_COMMENT print("Adding note to:", rec.oil_id) print(reference.reference) # rec.metadata.comments = rec.metadata.comments.replace(OLD_COMMENT, # NEW_COMMENT) if not dry_run: print("Saving out:", pth) rec.to_file(pth) else: print("Nothing saved") print(f"{count} records changed")
def test_does_not_break_test_records(): """ run validation on all the test data, just to make sure that nothing breaks """ for rec, _path in get_all_records(TEST_DATA_DIR): _msgs = rec.validate() assert True
def write_reports(base_dir, save): validation_by_record = {} validation_by_error = {} validation_by_record_rev = {} validation_by_error_rev = {} # validate all the records: for oil, pth in get_all_records(base_dir): print("\n\n******************\n") print(f"processing: {oil.oil_id}: {oil.metadata.name}") oil.reset_validation() # unpack into a dict for easier processing status = unpack_status(oil.status) if status: print(status) if (oil.review_status.status.lower() == "review complete" or is_only_ignored(status)): validation_by_record_rev[oil.oil_id] = (oil.metadata.name, oil.status) else: validation_by_record[oil.oil_id] = (oil.metadata.name, oil.status) for error_code, msgs in status.items(): issues = "\n".join(f"\n``{oil.oil_id}`` " f"-- {oil.metadata.name}:\n\n {msg}\n" for msg in msgs) if (oil.review_status.status.lower() == "review complete" or error_code in ERRORS_TO_IGNORE): validation_by_error_rev.setdefault(error_code, []).append(issues) else: validation_by_error.setdefault(error_code, []).append(issues) if save: with open(pth, 'w', encoding='utf-8') as datafile: json.dump(oil.py_json(), datafile, indent=4) with open("validation_by_record.rst", 'w', encoding="utf-8") as outfile1: write_header(outfile1, base_dir) write_by_record(outfile1, validation_by_record) write_header_rev(outfile1) write_by_record(outfile1, validation_by_record_rev) # write out the validation by error with open("validation_by_error.rst", 'w', encoding="utf-8") as outfile: write_header(outfile, base_dir) write_by_error(outfile, validation_by_error) write_header_rev(outfile) write_by_error(outfile, validation_by_error_rev)
def main(): base_dir, dry_run = process_input() for rec, pth in get_all_records(base_dir): print("\n\n******************\n") print("processing:", rec.oil_id, rec.metadata.name) # loop through the subsamples for ss in rec.sub_samples: pp = ss.physical_properties move_ift_note(pp.interfacial_tension_water) move_ift_note(pp.interfacial_tension_air) move_ift_note(pp.interfacial_tension_seawater) if not dry_run: print("Saving out:", pth) rec.to_file(pth) else: print("Nothing saved")
def run_through(): base_dir, dry_run = process_input(USAGE=USAGE) for oil, pth in get_all_records(base_dir): id = oil.oil_id name = oil.metadata.name location = oil.metadata.location if location.lower() == orig_location: print("\nProcessing:", id, name) print("changing location to:", new_location) oil.metadata.location = new_location if not dry_run: print("Saving out:", pth) oil.to_file(pth) else: print("Dry Run: Nothing saved")
def main(): base_dir, dry_run = ads.process_input(USAGE) print("writing: adios_distillation_data.csv") outfile = open("adios_distillation_data.csv", 'w') outfile.write( f"Oil ID\tName\tReference\tdistillation_method\tfraction_recovered\n") for rec, pth in ads.get_all_records(base_dir): ID = rec.oil_id name = rec.metadata.name reference = rec.metadata.reference.reference dist_method = rec.sub_samples[0].distillation_data.method frac_recov = rec.sub_samples[0].distillation_data.fraction_recovered outfile.write( f'{ID} \t"{name}" \t"{reference}" \t{dist_method} \t{frac_recov}\n' )
def add_the_method(): try: sys.argv.remove("dry_run") dry_run = True except ValueError: dry_run = False try: base_dir = sys.argv[1] except IndexError: print(USAGE) sys.exit() for oil, pth in get_all_records(base_dir): id = oil.oil_id name = oil.metadata.name pt = oil.metadata.product_type CCME = oil.sub_samples[0].CCME print("\nFor Oil:", id, name) print("CCME:", CCME) print() if (CCME.F1.value is None and CCME.F2.value is None and CCME.F3.value is None and CCME.F4.value is None): # put in an empty CCME print("replacing CCME with an empty one") oil.sub_samples[0].CCME = ccme.CCME() else: # Add the method print("adding the method") oil.sub_samples[0].CCME.method = "ESTS 5.03/x.x/M" # except: # if anything goes wrong, we won't add an labels # print("Something went wrong -- no change to CCME record") # return if not dry_run: print("Saving out:", pth) oil.to_file(pth) else: print("Dry Run: Nothing saved")
def add_the_method(): try: sys.argv.remove("dry_run") dry_run = True except ValueError: dry_run = False try: base_dir = sys.argv[1] except IndexError: print(USAGE) sys.exit() for oil, pth in get_all_records(base_dir): id = oil.oil_id name = oil.metadata.name pt = oil.metadata.product_type fractions = oil.sub_samples[0].ESTS_hydrocarbon_fractions print("\nFor Oil:", id, name) print("ESTS_HydroCarbonFractions:", fractions) print() if (not fractions.aromatics and not fractions.GC_TPH and not fractions.saturates): print("ESTS_hydrocarbon_fractions empty: removing") oil.sub_samples[0].ESTS_hydrocarbon_fractions = None else: print("adding the method") fractions.method = "ESTS 5.03/x.x/M" if not dry_run: print("Saving out:", pth) oil.to_file(pth) else: print("Dry Run: Nothing saved")
def add_the_labels(): # Look for "replace" try: sys.argv.remove("replace") replace = True except ValueError: replace = False base_dir, dry_run = process_input(USAGE) with open("labels.csv", 'w') as outfile: outfile.write("ID, Name, Product Type, Labels\n") for oil, pth in get_all_records(base_dir): id = oil.oil_id name = oil.metadata.name pt = oil.metadata.product_type print("\nFor Oil:", id, name) try: labels = get_suggested_labels(oil) print(labels) if not replace: labels.update(oil.metadata.labels) outfile.write(f"{id}, {name}, {pt}, {str(labels).strip('{}')}\n") if not dry_run: print("Saving out:", pth) oil.metadata.labels = list(labels) oil.to_file(pth) else: print("Nothing saved") except: # if anything goes wrong, we won't add an labels print("Something went wrong -- no labels") print("output written to: labels.txt")
def run_through(): base_dir, dry_run = process_input(USAGE=USAGE) new_ref = ('Environment and Climate Change Canada, Environment ' 'Canada Crude Oil and Petroleum Product Database, ' 'Environment and Climate Change Canada, 2021.\n\n' 'url: https://open.canada.ca/data/en/dataset/' '53c38f91-35c8-49a6-a437-b311703db8c5') for oil, pth in get_all_records(base_dir): id = oil.oil_id name = oil.metadata.name if id[:2] == 'EC': print("\nProcessing:", id, name) print("changing reference to:", new_ref) oil.metadata.reference.reference = new_ref if not dry_run: print("Saving out:", pth) oil.to_file(pth) else: print("Dry Run: Nothing saved")
def add_them(data): missing = open("missing_records.csv", 'w', encoding="utf-8") name_mismatch = open("name_mismatch_records.csv", 'w', encoding="utf-8") base_dir, dry_run = process_input(USAGE) for oil, pth in get_all_records(base_dir): ID = oil.oil_id try: row = data[ID] except KeyError: print(f"{ID} not in the spreadsheet") missing.write(",".join([ID, oil.metadata.name])) name_mismatch.write("\n") continue name = row[0] print("Processing:", ID) if name == oil.metadata.name: anames = [name.strip() for name in row[10].strip().split(",")] # clean out the duplicates anames = [n for n in anames if n.lower() not in name.lower()] if anames: oil.metadata.alternate_names = anames print("Adding:", oil.oil_id, oil.metadata.name, oil.metadata.alternate_names) if not dry_run: print("Saving out:", pth) oil.to_file(pth) else: print("Nothing saved") else: print("Name doesn't match!", name, oil.metadata.name) name_mismatch.write(",".join([ID, oil.metadata.name, name])) name_mismatch.write("\n")
def add_them(data): missing = open("missing_records.csv", 'w', encoding="utf-8") try: sys.argv.remove("dry_run") dry_run = True except ValueError: dry_run = False try: base_dir = sys.argv[1] except IndexError: print(USAGE) sys.exit() num_none = 0 num_one = 0 num_less_than_one = 0 num_fraction = 0 for oil, pth in get_all_records(base_dir): ID = oil.oil_id try: row = data[ID] except KeyError: print(f"{ID} not in the spreadsheet") missing.write(",".join([ID, oil.metadata.name])) missing.write("\n") continue name = row[0] print("Processing:", ID, name) fraction_recovered = row[3].strip() if not fraction_recovered: print("no data for this one") continue print(f"{fraction_recovered=}") for ss in oil.sub_samples: dist_data = ss.distillation_data print(dist_data.fraction_recovered) if fraction_recovered == 'None': dist_data.fraction_recovered = None num_none += 1 elif fraction_recovered == "<1": dist_data.fraction_recovered = Concentration(max_value=1.0, unit="fraction") num_less_than_one += 1 else: try: val = float(fraction_recovered) dist_data.fraction_recovered = Concentration( value=val, unit="fraction") if val == 1.0: num_one += 1 else: num_fraction += 1 except ValueError: raise if dist_data.fraction_recovered is not None: print("********************") print("after adding, frac_recovered:") print(dist_data.fraction_recovered) if not dry_run: print("Saving out:", pth) oil.to_file(pth) else: print("Nothing saved") print(f"{num_none=}") print(f"{num_one=}") print(f"{num_less_than_one=}") print(f"{num_fraction=}")
#!/usr/bin/env python """ This script looks at the distillation type, and sets the unit_type properly """ from adios_db.models.oil.cleanup.distillation import FixCutUnitType import adios_db.scripting as ads json_data_dir, dry_run = ads.process_input() # Read the data for rec, pth in ads.get_all_records(json_data_dir): print("processing:", rec.oil_id) fixer = FixCutUnitType(rec) flag, msg = fixer.check() if flag is True: print(msg) print("Cleaning up!") fixer.cleanup() if not dry_run: print("Saving out:", pth) rec.to_file(pth) else: print("Nothing saved")
#!/usr/bin/env python """ script to see which oils are suitable for gnome """ import sys import adios_db.scripting as dbs from adios_db.computation.gnome_oil import make_gnome_oil data_dir = sys.argv[1] #outfile = open("gnome_oil_info.csv", 'w', encoding="utf-8") #outfile.write('Name, ID, "Product Type"\n') # Make gnome_oil from all of the records for oil, path in dbs.get_all_records(data_dir): print(oil.metadata.name) fresh = oil.sub_samples[0] try: go = make_gnome_oil(oil) except Exception as err: print("failed to make gnome oil ", err, oil.metadata.name, oil.oil_id, oil.metadata.product_type) #outfile.write(f'"{oil.metadata.name}", {oil.oil_id}, {len(kvis)}, {len(dvis)}\n')
""" # the scripting module has a few utilities and the # core Python objects you may need for scripting # calling it "adb" for easy typing import adios_db.scripting as adb base_dir, dry_run = adb.process_input() # create a new text file for output outfile = open("example_data.csv", 'w', encoding="utf-8") # write the header row: outfile.write('Name, ID, Dynamic Viscosity, unit, reference temp, unit\n') # Loop through all the JSON files in the given directory: for oil, path in adb.get_all_records(base_dir): print("\n**** ", oil.metadata.name) # select the desired product types: if oil.metadata.product_type in { "Crude Oil NOS", "Condensate", "Tight Oil", }: print(">> This is a Crude of some sort") fresh = oil.sub_samples[0] try: dvis = fresh.physical_properties.dynamic_viscosities[0]