Example #1
0
def main(cli_args=None):
    """Main function to implement command-line interface"""
    if cli_args is None:
        cli_args = sys.argv[1:]

    args = parse_args(cli_args)

    db.ensure_tables(args.db)
    db.ensure_indexes(args.db)

    if args.evt_dir:
        # Try to insert all SFl files in EVT dir
        insert_files_recursive(
            args.db, args.evt_dir, args.cruise, gga=args.gga, west=args.west)
    else:
        # User specified SFL file
        insert_files(
            [args.sfl], args.db, args.cruise, gga=args.gga, west=args.west)
Example #2
0
def main(cli_args=None):
    """Main function to implement command-line interface"""
    if cli_args is None:
        cli_args = sys.argv[1:]

    args = parse_args(cli_args)

    db.ensure_tables(args.db)
    db.ensure_indexes(args.db)

    if args.evt_dir:
        # Try to insert all SFl files in EVT dir
        insert_files_recursive(args.db,
                               args.evt_dir,
                               args.cruise,
                               gga=args.gga,
                               west=args.west)
    else:
        # User specified SFL file
        insert_files([args.sfl],
                     args.db,
                     args.cruise,
                     gga=args.gga,
                     west=args.west)
Example #3
0
def filter_evt_files(files,
                     cruise,
                     filter_options,
                     dbpath,
                     opp_dir,
                     s3=False,
                     process_count=1,
                     every=10.0):
    """Filter a list of EVT files.

    Arguments arguments:
        files - paths to EVT files to filter
        cruise - cruise name
        filter_options - Dictionary of filter params
            (notch1, notch2, width, offset, origin)
        dbpath = SQLite3 db path
        opp_dir = Directory for output binary OPP files

    Keyword arguments:
        s3 - Get EVT data from S3
        process_count - number of worker processes to use
        every - Percent progress output resolution
    """
    o = {
        "file": None,  # fill in later
        "cruise": cruise,
        "process_count": process_count,
        "filter_options": filter_options,
        "every": every,
        "s3": s3,
        "cloud_config_items": None,
        "dbpath": dbpath,
        "opp_dir": opp_dir,
        "filter_id": None  # fill in later
    }

    if dbpath:
        dbdir = os.path.dirname(dbpath)
        if dbdir and not os.path.isdir(dbdir):
            util.mkdir_p(dbdir)
        db.ensure_tables(dbpath)
        o["filter_id"] = db.save_filter_params(dbpath, filter_options)

    if s3:
        config = conf.get_aws_config(s3_only=True)
        o["cloud_config_items"] = config.items("aws")

    if process_count > 1:
        # Create a pool of N worker processes
        pool = Pool(process_count)

        def mapper(worker, task_list):
            return pool.imap_unordered(worker, task_list)
    else:

        def mapper(worker, task_list):
            return imap(worker, task_list)

    evt_count = 0
    evt_signal_count = 0
    opp_count = 0
    files_ok = 0

    # Construct worker inputs
    inputs = []
    for f in files:
        inputs.append(copy.copy(o))
        inputs[-1]["file"] = f

    print ""
    print "Filtering %i EVT files. Progress every %i%% (approximately)" % \
        (len(files), every)

    t0 = time.time()

    last = 0  # Last progress milestone in increments of every
    evt_count_block = 0  # EVT particles in this block (between milestones)
    evt_signal_count_block = 0  # EVT noise filtered particles in this block
    opp_count_block = 0  # OPP particles in this block

    # Filter particles in parallel with process pool
    for i, res in enumerate(mapper(do_work, inputs)):
        evt_count_block += res["evt_count"]
        evt_signal_count_block += res["evt_signal_count"]
        opp_count_block += res["opp_count"]
        files_ok += 1 if res["ok"] else 0

        # Print progress periodically
        perc = float(i + 1) / len(files) * 100  # Percent completed
        # Round down to closest every%
        milestone = int(perc / every) * every
        if milestone > last:
            now = time.time()
            evt_count += evt_count_block
            evt_signal_count += evt_signal_count_block
            opp_count += opp_count_block
            ratio_signal_block = zerodiv(opp_count_block,
                                         evt_signal_count_block)
            ratio_block = zerodiv(opp_count_block, evt_count_block)
            msg = "File: %i/%i (%.02f%%)" % (i + 1, len(files), perc)
            msg += " Particles this block: %i / %i (%i) %.04f (%.04f) elapsed: %.2fs" % \
                (opp_count_block, evt_signal_count_block, evt_count_block,
                ratio_signal_block, ratio_block, now - t0)
            print msg
            sys.stdout.flush()
            last = milestone
            evt_count_block = 0
            evt_signal_count_block = 0
            opp_count_block = 0
    # If any particle count data is left, add it to totals
    if evt_count_block > 0:
        evt_count += evt_count_block
        evt_signal_count += evt_signal_count_block
        opp_count += opp_count_block

    opp_evt_signal_ratio = zerodiv(opp_count, evt_signal_count)
    opp_evt_ratio = zerodiv(opp_count, evt_count)

    t1 = time.time()
    delta = t1 - t0
    evtrate = zerodiv(evt_count, delta)
    evtsignalrate = zerodiv(evt_signal_count, delta)
    opprate = zerodiv(opp_count, delta)

    print ""
    print "Input EVT files = %i" % len(files)
    print "Parsed EVT files = %i" % files_ok
    print "EVT particles = %s (%.2f p/s)" % (evt_count, evtrate)
    print "EVT noise filtered particles = %s (%.2f p/s)" % (evt_signal_count,
                                                            evtsignalrate)
    print "OPP particles = %s (%.2f p/s)" % (opp_count, opprate)
    print "OPP/EVT ratio = %.04f (%.04f)" % (opp_evt_signal_ratio,
                                             opp_evt_ratio)
    print "Filtering completed in %.2f seconds" % (delta, )
Example #4
0
def filter_evt_files(files, cruise, filter_options, dbpath, opp_dir, s3=False,
                     process_count=1, every=10.0):
    """Filter a list of EVT files.

    Arguments arguments:
        files - paths to EVT files to filter
        cruise - cruise name
        filter_options - Dictionary of filter params
            (notch1, notch2, width, offset, origin)
        dbpath = SQLite3 db path
        opp_dir = Directory for output binary OPP files

    Keyword arguments:
        s3 - Get EVT data from S3
        process_count - number of worker processes to use
        every - Percent progress output resolution
    """
    o = {
        "file": None,  # fill in later
        "cruise": cruise,
        "process_count": process_count,
        "filter_options": filter_options,
        "every": every,
        "s3": s3,
        "cloud_config_items": None,
        "dbpath": dbpath,
        "opp_dir": opp_dir,
        "filter_id": None  # fill in later
    }

    if dbpath:
        dbdir = os.path.dirname(dbpath)
        if dbdir and not os.path.isdir(dbdir):
            util.mkdir_p(dbdir)
        db.ensure_tables(dbpath)
        o["filter_id"] = db.save_filter_params(dbpath, filter_options)

    if s3:
        config = conf.get_aws_config(s3_only=True)
        o["cloud_config_items"] = config.items("aws")

    if process_count > 1:
        # Create a pool of N worker processes
        pool = Pool(process_count)
        def mapper(worker, task_list):
            return pool.imap_unordered(worker, task_list)
    else:
        def mapper(worker, task_list):
            return imap(worker, task_list)

    evt_count = 0
    evt_signal_count = 0
    opp_count = 0
    files_ok = 0

    # Construct worker inputs
    inputs = []
    for f in files:
        inputs.append(copy.copy(o))
        inputs[-1]["file"] = f

    print ""
    print "Filtering %i EVT files. Progress every %i%% (approximately)" % \
        (len(files), every)

    t0 = time.time()

    last = 0  # Last progress milestone in increments of every
    evt_count_block = 0  # EVT particles in this block (between milestones)
    evt_signal_count_block = 0  # EVT noise filtered particles in this block
    opp_count_block = 0  # OPP particles in this block

    # Filter particles in parallel with process pool
    for i, res in enumerate(mapper(do_work, inputs)):
        evt_count_block += res["evt_count"]
        evt_signal_count_block += res["evt_signal_count"]
        opp_count_block += res["opp_count"]
        files_ok += 1 if res["ok"] else 0

        # Print progress periodically
        perc = float(i + 1) / len(files) * 100  # Percent completed
        # Round down to closest every%
        milestone = int(perc / every) * every
        if milestone > last:
            now = time.time()
            evt_count += evt_count_block
            evt_signal_count += evt_signal_count_block
            opp_count += opp_count_block
            ratio_signal_block = zerodiv(opp_count_block, evt_signal_count_block)
            ratio_block = zerodiv(opp_count_block, evt_count_block)
            msg = "File: %i/%i (%.02f%%)" % (i + 1, len(files), perc)
            msg += " Particles this block: %i / %i (%i) %.04f (%.04f) elapsed: %.2fs" % \
                (opp_count_block, evt_signal_count_block, evt_count_block,
                ratio_signal_block, ratio_block, now - t0)
            print msg
            sys.stdout.flush()
            last = milestone
            evt_count_block = 0
            evt_signal_count_block = 0
            opp_count_block = 0
    # If any particle count data is left, add it to totals
    if evt_count_block > 0:
        evt_count += evt_count_block
        evt_signal_count += evt_signal_count_block
        opp_count += opp_count_block

    opp_evt_signal_ratio = zerodiv(opp_count, evt_signal_count)
    opp_evt_ratio = zerodiv(opp_count, evt_count)

    t1 = time.time()
    delta = t1 - t0
    evtrate = zerodiv(evt_count, delta)
    evtsignalrate = zerodiv(evt_signal_count, delta)
    opprate = zerodiv(opp_count, delta)

    print ""
    print "Input EVT files = %i" % len(files)
    print "Parsed EVT files = %i" % files_ok
    print "EVT particles = %s (%.2f p/s)" % (evt_count, evtrate)
    print "EVT noise filtered particles = %s (%.2f p/s)" % (evt_signal_count, evtsignalrate)
    print "OPP particles = %s (%.2f p/s)" % (opp_count, opprate)
    print "OPP/EVT ratio = %.04f (%.04f)" % (opp_evt_signal_ratio, opp_evt_ratio)
    print "Filtering completed in %.2f seconds" % (delta,)