def sfl_print_cmd(sfl_files): """ Concatenates raw SFL files, prints a standardized SFL file. Makes the following changes to create a standardized SFL file: - Outputs only columns for database import. - The correct day of year folder will be added to FILE column values if not present - DATE column will be created if not present based on "FILE" column values (only applies to new-style datestamped file names) - STREAM PRESSURE values <= 0 will be changed to 1e-4 - Any other required columns which are missing will be created with NA values. Input files will be concatenated in the order they're listed on the command-line. Outputs to STDOUT. """ df = None for f in sfl_files: onedf = sfl.read_file(f) onedf = sfl.fix(onedf) if df is None: df = onedf else: df = df.append(onedf) sfl.save_to_file(df, sys.stdout)
def db_import_sfl_cmd(cruise, force, json, serial, verbose, sfl_file, db_file): """ Imports SFL metadata to database. Writes processed SFL-FILE data to SQLite3 database file. Data will be checked before inserting. If any errors are found the first of each type will be reported and no data will be written. To read from STDIN use '-' for SFL-FILE. SFL-FILE should have the <cruise name> and <instrument serial> embedded in the filename as '<cruise name>_<instrument serial>.sfl'. If not, specify as options. If a database file does not exist a new one will be created. Errors or warnings are output to STDOUT. """ if sfl_file is not sys.stdin: # Try to read cruise and serial from filename results = sfl.parse_sfl_filename(sfl_file.name) if results: if cruise is None: cruise = results[0] if serial is None: serial = results[1] # Try to read cruise and serial from database if not already defined if cruise is None: try: cruise = db.get_cruise(db_file) except SeaFlowpyError as e: pass if serial is None: try: serial = db.get_serial(db_file) except SeaFlowpyError as e: pass # Make sure cruise and serial are defined somewhere if cruise is None or serial is None: raise click.ClickException( 'instrument serial and cruise must both be specified either in filename as <cruise>_<instrument-serial>.sfl, as command-line options, or in database metadata table.' ) df = sfl.read_file(sfl_file) df = sfl.fix(df) errors = sfl.check(df) if len(errors) > 0: if json: sfl.print_json_errors(errors, sys.stdout, print_all=verbose) else: sfl.print_tsv_errors(errors, sys.stdout, print_all=verbose) if not force and len([e for e in errors if e["level"] == "error"]) > 0: sys.exit(1) sfl.save_to_db(df, db_file, cruise, serial)
def sfl_dedup_cmd(sfl_file): """ Removes duplicate 'FILE' lines. To read from STDIN use '-' for SFL_FILE. Removes lines with duplicate file entries and prints modified SFL to STDOUT. Because it's impossible to know which of the duplicated SFL entries corresponds to which EVT file, all duplicate rows are removed. Prints a unique list of files removed to STDERR. Duplicate files should also be removed from EVT data sets. """ df = sfl.read_file(sfl_file) df = sfl.fix(df) dup_files, df = sfl.dedup(df) if len(dup_files): click.echo(os.linesep.join(['{}\t{}'.format(*d) for d in dup_files]), err=True) sfl.save_to_file(df, sys.stdout)
def sfl_fix_event_rate_cmd(sfl_file, events_file): """ Calculates true event rates. EVENTS-FILE should be a TSV file with EVT path/file ID in first column and event count in last column, or a popcycle SQLite3 database file with a '.db' extension. A version of SFL_FILE with updated event rates will be printed to STDOUT. In cases where the file duration value is < 0 or NA the event rate will be NA. """ df = sfl.read_file(sfl_file) df = sfl.fix(df) # Event counts should be a dict of { file: event_count } if events_file.endswith(".db"): event_counts = db.get_event_counts(events_file) else: lines = [x.rstrip().split('\t') for x in events_file.readlines()] event_counts = {seaflowfile.SeaFlowFile(x[0]).file_id: int(x[-1]) for x in lines} df = sfl.fix_event_rate(df, event_counts) sfl.save_to_file(df, sys.stdout)