def main(cli_args=None): """Main function to implement command-line interface""" if cli_args is None: cli_args = sys.argv[1:] args = parse_args(cli_args) db.ensure_tables(args.db) db.ensure_indexes(args.db) if args.evt_dir: # Try to insert all SFl files in EVT dir insert_files_recursive( args.db, args.evt_dir, args.cruise, gga=args.gga, west=args.west) else: # User specified SFL file insert_files( [args.sfl], args.db, args.cruise, gga=args.gga, west=args.west)
def main(cli_args=None): """Main function to implement command-line interface""" if cli_args is None: cli_args = sys.argv[1:] args = parse_args(cli_args) db.ensure_tables(args.db) db.ensure_indexes(args.db) if args.evt_dir: # Try to insert all SFl files in EVT dir insert_files_recursive(args.db, args.evt_dir, args.cruise, gga=args.gga, west=args.west) else: # User specified SFL file insert_files([args.sfl], args.db, args.cruise, gga=args.gga, west=args.west)
def filter_evt_files(files, cruise, filter_options, dbpath, opp_dir, s3=False, process_count=1, every=10.0): """Filter a list of EVT files. Arguments arguments: files - paths to EVT files to filter cruise - cruise name filter_options - Dictionary of filter params (notch1, notch2, width, offset, origin) dbpath = SQLite3 db path opp_dir = Directory for output binary OPP files Keyword arguments: s3 - Get EVT data from S3 process_count - number of worker processes to use every - Percent progress output resolution """ o = { "file": None, # fill in later "cruise": cruise, "process_count": process_count, "filter_options": filter_options, "every": every, "s3": s3, "cloud_config_items": None, "dbpath": dbpath, "opp_dir": opp_dir, "filter_id": None # fill in later } if dbpath: dbdir = os.path.dirname(dbpath) if dbdir and not os.path.isdir(dbdir): util.mkdir_p(dbdir) db.ensure_tables(dbpath) o["filter_id"] = db.save_filter_params(dbpath, filter_options) if s3: config = conf.get_aws_config(s3_only=True) o["cloud_config_items"] = config.items("aws") if process_count > 1: # Create a pool of N worker processes pool = Pool(process_count) def mapper(worker, task_list): return pool.imap_unordered(worker, task_list) else: def mapper(worker, task_list): return imap(worker, task_list) evt_count = 0 evt_signal_count = 0 opp_count = 0 files_ok = 0 # Construct worker inputs inputs = [] for f in files: inputs.append(copy.copy(o)) inputs[-1]["file"] = f print "" print "Filtering %i EVT files. Progress every %i%% (approximately)" % \ (len(files), every) t0 = time.time() last = 0 # Last progress milestone in increments of every evt_count_block = 0 # EVT particles in this block (between milestones) evt_signal_count_block = 0 # EVT noise filtered particles in this block opp_count_block = 0 # OPP particles in this block # Filter particles in parallel with process pool for i, res in enumerate(mapper(do_work, inputs)): evt_count_block += res["evt_count"] evt_signal_count_block += res["evt_signal_count"] opp_count_block += res["opp_count"] files_ok += 1 if res["ok"] else 0 # Print progress periodically perc = float(i + 1) / len(files) * 100 # Percent completed # Round down to closest every% milestone = int(perc / every) * every if milestone > last: now = time.time() evt_count += evt_count_block evt_signal_count += evt_signal_count_block opp_count += opp_count_block ratio_signal_block = zerodiv(opp_count_block, evt_signal_count_block) ratio_block = zerodiv(opp_count_block, evt_count_block) msg = "File: %i/%i (%.02f%%)" % (i + 1, len(files), perc) msg += " Particles this block: %i / %i (%i) %.04f (%.04f) elapsed: %.2fs" % \ (opp_count_block, evt_signal_count_block, evt_count_block, ratio_signal_block, ratio_block, now - t0) print msg sys.stdout.flush() last = milestone evt_count_block = 0 evt_signal_count_block = 0 opp_count_block = 0 # If any particle count data is left, add it to totals if evt_count_block > 0: evt_count += evt_count_block evt_signal_count += evt_signal_count_block opp_count += opp_count_block opp_evt_signal_ratio = zerodiv(opp_count, evt_signal_count) opp_evt_ratio = zerodiv(opp_count, evt_count) t1 = time.time() delta = t1 - t0 evtrate = zerodiv(evt_count, delta) evtsignalrate = zerodiv(evt_signal_count, delta) opprate = zerodiv(opp_count, delta) print "" print "Input EVT files = %i" % len(files) print "Parsed EVT files = %i" % files_ok print "EVT particles = %s (%.2f p/s)" % (evt_count, evtrate) print "EVT noise filtered particles = %s (%.2f p/s)" % (evt_signal_count, evtsignalrate) print "OPP particles = %s (%.2f p/s)" % (opp_count, opprate) print "OPP/EVT ratio = %.04f (%.04f)" % (opp_evt_signal_ratio, opp_evt_ratio) print "Filtering completed in %.2f seconds" % (delta, )
def filter_evt_files(files, cruise, filter_options, dbpath, opp_dir, s3=False, process_count=1, every=10.0): """Filter a list of EVT files. Arguments arguments: files - paths to EVT files to filter cruise - cruise name filter_options - Dictionary of filter params (notch1, notch2, width, offset, origin) dbpath = SQLite3 db path opp_dir = Directory for output binary OPP files Keyword arguments: s3 - Get EVT data from S3 process_count - number of worker processes to use every - Percent progress output resolution """ o = { "file": None, # fill in later "cruise": cruise, "process_count": process_count, "filter_options": filter_options, "every": every, "s3": s3, "cloud_config_items": None, "dbpath": dbpath, "opp_dir": opp_dir, "filter_id": None # fill in later } if dbpath: dbdir = os.path.dirname(dbpath) if dbdir and not os.path.isdir(dbdir): util.mkdir_p(dbdir) db.ensure_tables(dbpath) o["filter_id"] = db.save_filter_params(dbpath, filter_options) if s3: config = conf.get_aws_config(s3_only=True) o["cloud_config_items"] = config.items("aws") if process_count > 1: # Create a pool of N worker processes pool = Pool(process_count) def mapper(worker, task_list): return pool.imap_unordered(worker, task_list) else: def mapper(worker, task_list): return imap(worker, task_list) evt_count = 0 evt_signal_count = 0 opp_count = 0 files_ok = 0 # Construct worker inputs inputs = [] for f in files: inputs.append(copy.copy(o)) inputs[-1]["file"] = f print "" print "Filtering %i EVT files. Progress every %i%% (approximately)" % \ (len(files), every) t0 = time.time() last = 0 # Last progress milestone in increments of every evt_count_block = 0 # EVT particles in this block (between milestones) evt_signal_count_block = 0 # EVT noise filtered particles in this block opp_count_block = 0 # OPP particles in this block # Filter particles in parallel with process pool for i, res in enumerate(mapper(do_work, inputs)): evt_count_block += res["evt_count"] evt_signal_count_block += res["evt_signal_count"] opp_count_block += res["opp_count"] files_ok += 1 if res["ok"] else 0 # Print progress periodically perc = float(i + 1) / len(files) * 100 # Percent completed # Round down to closest every% milestone = int(perc / every) * every if milestone > last: now = time.time() evt_count += evt_count_block evt_signal_count += evt_signal_count_block opp_count += opp_count_block ratio_signal_block = zerodiv(opp_count_block, evt_signal_count_block) ratio_block = zerodiv(opp_count_block, evt_count_block) msg = "File: %i/%i (%.02f%%)" % (i + 1, len(files), perc) msg += " Particles this block: %i / %i (%i) %.04f (%.04f) elapsed: %.2fs" % \ (opp_count_block, evt_signal_count_block, evt_count_block, ratio_signal_block, ratio_block, now - t0) print msg sys.stdout.flush() last = milestone evt_count_block = 0 evt_signal_count_block = 0 opp_count_block = 0 # If any particle count data is left, add it to totals if evt_count_block > 0: evt_count += evt_count_block evt_signal_count += evt_signal_count_block opp_count += opp_count_block opp_evt_signal_ratio = zerodiv(opp_count, evt_signal_count) opp_evt_ratio = zerodiv(opp_count, evt_count) t1 = time.time() delta = t1 - t0 evtrate = zerodiv(evt_count, delta) evtsignalrate = zerodiv(evt_signal_count, delta) opprate = zerodiv(opp_count, delta) print "" print "Input EVT files = %i" % len(files) print "Parsed EVT files = %i" % files_ok print "EVT particles = %s (%.2f p/s)" % (evt_count, evtrate) print "EVT noise filtered particles = %s (%.2f p/s)" % (evt_signal_count, evtsignalrate) print "OPP particles = %s (%.2f p/s)" % (opp_count, opprate) print "OPP/EVT ratio = %.04f (%.04f)" % (opp_evt_signal_ratio, opp_evt_ratio) print "Filtering completed in %.2f seconds" % (delta,)