Esempio n. 1
0
def sfl_print_cmd(sfl_files):
    """
    Concatenates raw SFL files, prints a standardized SFL file.

    Makes the following changes to create a standardized SFL file:

    - Outputs only columns for database import.

    - The correct day of year folder will be added to FILE column values if not
    present

    - DATE column will be created if not present based on "FILE" column values
    (only applies to new-style datestamped file names)

    - STREAM PRESSURE values <= 0 will be changed to 1e-4

    - Any other required columns which are missing will be created with NA
    values.

    Input files will be concatenated in the order they're listed on the
    command-line. Outputs to STDOUT.
    """
    df = None
    for f in sfl_files:
        onedf = sfl.read_file(f)
        onedf = sfl.fix(onedf)
        if df is None:
            df = onedf
        else:
            df = df.append(onedf)
    sfl.save_to_file(df, sys.stdout)
Esempio n. 2
0
def sfl_convert_gga_cmd(sfl_file):
    """
    Converts GGA coords to decimal degrees.

    To read from STDIN use '-' for SFL_FILE. Prints modified SFL file to STDOUT.
    """
    df = sfl.read_file(sfl_file, convert_numerics=False)
    try:
        df = sfl.convert_gga2dd(df)
    except ValueError as e:
        raise click.ClickException(str(e))
    sfl.save_to_file(df, sys.stdout)
Esempio n. 3
0
def db_import_sfl_cmd(cruise, force, json, serial, verbose, sfl_file, db_file):
    """
    Imports SFL metadata to database.

    Writes processed SFL-FILE data to SQLite3 database file. Data will be
    checked before inserting. If any errors are found the first of each type
    will be reported and no data will be written. To read from STDIN use '-'
    for SFL-FILE. SFL-FILE should have the <cruise name> and <instrument serial>
    embedded in the filename as '<cruise name>_<instrument serial>.sfl'. If not,
    specify as options. If a database file does not exist a new one will be
    created. Errors or warnings are output to STDOUT.
    """
    if sfl_file is not sys.stdin:
        # Try to read cruise and serial from filename
        results = sfl.parse_sfl_filename(sfl_file.name)
        if results:
            if cruise is None:
                cruise = results[0]
            if serial is None:
                serial = results[1]

    # Try to read cruise and serial from database if not already defined
    if cruise is None:
        try:
            cruise = db.get_cruise(db_file)
        except SeaFlowpyError as e:
            pass
    if serial is None:
        try:
            serial = db.get_serial(db_file)
        except SeaFlowpyError as e:
            pass

    # Make sure cruise and serial are defined somewhere
    if cruise is None or serial is None:
        raise click.ClickException(
            'instrument serial and cruise must both be specified either in filename as <cruise>_<instrument-serial>.sfl, as command-line options, or in database metadata table.'
        )

    df = sfl.read_file(sfl_file)

    df = sfl.fix(df)
    errors = sfl.check(df)

    if len(errors) > 0:
        if json:
            sfl.print_json_errors(errors, sys.stdout, print_all=verbose)
        else:
            sfl.print_tsv_errors(errors, sys.stdout, print_all=verbose)
        if not force and len([e for e in errors if e["level"] == "error"]) > 0:
            sys.exit(1)
    sfl.save_to_db(df, db_file, cruise, serial)
Esempio n. 4
0
def sfl_detect_gga_cmd(sfl_file):
    """
    Detects GGA coordinates in SFL_FILE.

    To read from STDIN use '-' for SFL_FILE. Prints 'True' to STDOUT if any GGA
    coordinates are found, else 'False'.
    """
    df = sfl.read_file(sfl_file, convert_numerics=False)
    # Has any GGA coordinates?
    if sfl.has_gga(df):
        click.echo('True')
    else:
        click.echo('False')
Esempio n. 5
0
def manifest_cmd(verbose, sfl_file, evt_dir):
    """
    Compares files in SFL-FILE with files in EVT-DIR.

    If EVT-DIR begins with 's3://s3-bucket-name' then files will located in S3.
    To configure credentials for S3 access use the 'aws' command-line tool from
    the 'awscli' Python package.

    It's normal for about one file per day to be missing from the SFL file or
    EVT day of year folder, especially around midnight.

    To read from STDIN use '-' for SFL_FILE. Prints a file list diff to STDOUT.
    """
    found_evt_ids = []
    if evt_dir.startswith("s3://"):
        try:
            _, _, bucket, evt_dir = evt_dir.split("/", 3)
        except ValueError:
            raise click.ClickException("could not parse bucket and folder from S3 EVT-DIR")
        cloud = clouds.AWS([("s3-bucket", bucket)])
        try:
            files = cloud.get_files(evt_dir)
        except botocore.exceptions.NoCredentialsError:
            print('Please configure aws first:', file=sys.stderr)
            print('  $ pip install awscli', file=sys.stderr)
            print('  then', file=sys.stderr)
            print('  $ aws configure', file=sys.stderr)
            raise click.Abort()
        found_evt_files = seaflowfile.sorted_files(seaflowfile.keep_evt_files(files))
    else:
        found_evt_files = seaflowfile.find_evt_files(evt_dir)

    df = sfl.read_file(sfl_file)
    sfl_evt_ids = [seaflowfile.SeaFlowFile(f).file_id for f in df['file']]
    found_evt_ids = [seaflowfile.SeaFlowFile(f).path_file_id for f in found_evt_files]
    sfl_set = set(sfl_evt_ids)
    found_set = set(found_evt_ids)

    print('%d EVT files in SFL file %s' % (len(sfl_set), sfl_file.name))
    print('%d EVT files in directory %s' % (len(found_set), evt_dir))
    print('%d EVT files in common' % len(sfl_set.intersection(found_set)))
    if verbose and \
       (len(sfl_set.intersection(found_set)) != len(sfl_set) or
        len(sfl_set.intersection(found_set)) != len(found_set)):
        print('')
        print('EVT files in SFL but not found:')
        print(os.linesep.join(sorted(sfl_set.difference(found_set))))
        print('')
        print('EVT files found but not in SFL:')
        print(os.linesep.join(sorted(found_set.difference(sfl_set))))
        print('')
Esempio n. 6
0
def sfl_dedup_cmd(sfl_file):
    """
    Removes duplicate 'FILE' lines.

    To read from STDIN use '-' for SFL_FILE. Removes lines with duplicate file
    entries and prints modified SFL to STDOUT. Because it's impossible to know
    which of the duplicated SFL entries corresponds to which EVT file, all
    duplicate rows are removed. Prints a unique list of files removed to STDERR.
    Duplicate files should also be removed from EVT data sets.
    """
    df = sfl.read_file(sfl_file)
    df = sfl.fix(df)
    dup_files, df = sfl.dedup(df)
    if len(dup_files):
        click.echo(os.linesep.join(['{}\t{}'.format(*d) for d in dup_files]), err=True)
    sfl.save_to_file(df, sys.stdout)
Esempio n. 7
0
def sfl_validate_cmd(json, verbose, sfl_file):
    """
    Validates SFL files.

    Checks that:

    - Required columns are present: FILE, DATE, FILE DURATION, LAT, LON,
    CONDUCTIVITY, SALINITY, OCEAN TEMP, PAR, BULK RED, STREAM PRESSURE,
    EVENT RATE

    - No missing values in following columns: FILE, DATE, FILE DURATION, LAT,
    LON, STREAM PRESSURE, EVENT RATE

    - FILE column values have day of year folders, are in the proper format, in
    chronological order, are unique, and matches DATE column

    - DATE column values are in the proper format, represent valid dates and
    times, and are UTC

    - FILE DURATION is a positive number

    - LAT and LON column values are valid decimal degree values in the correct
    ranges

    - CONDUCTIVITY, SALINITY, OCEAN TEMP, PAR, BULK_RED column values are
    numbers

    - STREAM PRESSURE is a positive number >= 1e-4

    - EVENT RATE is a positive number

    Because some of these errors can affect every row of the file (e.g. out of
    order files), only the first error of each type is printed. To get a full
    printout of all errors use --verbose.

    Prints to STDOUT.
    """
    for f in sfl_file:
        print(os.path.basename(f))
        df = sfl.read_file(f)
        errors = sfl.check(df)
        if len(errors) > 0:
            if json:
                sfl.print_json_errors(errors, sys.stdout, print_all=verbose)
            else:
                sfl.print_tsv_errors(errors, sys.stdout, print_all=verbose)
        print("")  # blank line spacer
Esempio n. 8
0
def sfl_fix_event_rate_cmd(sfl_file, events_file):
    """
    Calculates true event rates.

    EVENTS-FILE should be a TSV file with EVT path/file ID in first
    column and event count in last column, or a popcycle SQLite3 database file
    with a '.db' extension. A version of SFL_FILE with updated event rates will
    be printed to STDOUT. In cases where the file duration value is < 0 or NA
    the event rate will be NA.
    """
    df = sfl.read_file(sfl_file)
    df = sfl.fix(df)

    # Event counts should be a dict of { file: event_count }
    if events_file.endswith(".db"):
        event_counts = db.get_event_counts(events_file)
    else:
        lines = [x.rstrip().split('\t') for x in events_file.readlines()]
        event_counts = {seaflowfile.SeaFlowFile(x[0]).file_id: int(x[-1]) for x in lines}

    df = sfl.fix_event_rate(df, event_counts)
    sfl.save_to_file(df, sys.stdout)