def main(BouncerSubclass): '''Within a module that defines a subclass for BouncerProcessManager, say FooSubclass, you can do this: if __init__ == "__main__": main(FooSubclass) which parses command line arguments, instantiates your sublcass, and runs its server.''' _,filename,_,_,_,_ = inspect.getouterframes(inspect.currentframe())[1] logname = os.path.basename(filename) cwd = os.getcwd() default_config = os.path.join(cwd, "bouncer_config.json") parser = argparse.ArgumentParser(description='Bouncer process manager for %s' % logname) parser.add_argument("-c", "--config", type=str, default=default_config, help="Default=%(default)s. The config file. See bouncer/bouncer_common.py for config-file format.") parser.add_argument("-a", "--addr", type=str, default="127.0.0.1", help="Default=%(default)s. Address where the bouncer listens from") parser.add_argument("-p", "--port", type=int, default=3001, help="Default=%(default)d. Port where the bouncer listens from") log.add_arguments(parser) args = parser.parse_args() logger = log.getLogger(args, name=logname) logger.info("Command line arguments: %s" % str(args)) try: with open(args.config, "r") as f: pass except: logger.critical("Error: could not open config file (%s)" % args.config) sys.exit(1) if not issubclass(BouncerSubclass, BouncerProcessManager): raise ValueError("The given class, %s, is not a subclass of BouncerProcessManager" % bouncerClass) config_filename = args.config addr = args.addr port = args.port try: with open(config_filename) as f: config = Config(f) bpm = BouncerSubclass(config, addr, port, logger) except: logger.critical("Error while parsing config file. View bouncer/bouncer_common.py for format of config.") raise bpm.run()
def create_parser(model, parser): parser = model.add_arguments(parser) parser = dataset.add_arguments(parser) parser = train.add_training_arguments(parser) parser = train.add_ipu_arguments(parser) parser = logging.add_arguments(parser) return parser
def create_parser(model, lr_schedule, parser): parser = model.add_arguments(parser) parser = dataset.add_arguments(parser) parser = add_training_arguments(parser) parser = lr_schedule.add_arguments(parser) parser = add_ipu_arguments(parser) parser = logging.add_arguments(parser) return parser
def create_all_options_parser(): parser = argparse.ArgumentParser( description='BERT Pretraining in TensorFlow', add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser = add_main_arguments(parser) parser = dataset.add_arguments(parser) parser = add_training_arguments(parser) parser = add_ipu_arguments(parser) parser = bert_logging.add_arguments(parser) parser = add_bert_arguments(parser) return parser
def add_common_arguments(parser, required=True): group = parser.add_argument_group('Common options') # Training options # fmt:off group.add_argument('--micro-batch-size', type=int, help="Set micro-batch-size for training graph") group.add_argument( '--global-batch-size', type=int, default=None, help="The total batch size at which we want the model to run") group.add_argument( '--base-learning-rate', type=float, default=2e-5, help="Base learning rate exponent (2**N). blr = lr / bs") group.add_argument('--num-train-steps', type=int, help="Number of training steps.") group.add_argument('--loss-scaling', type=float, default=1, help="Loss scaling factor.") group.add_argument( '--loss-scaling-by-step', type=str, default=None, help= "Specify changing loss scaling factors at given training steps, as a dictionary." ) group.add_argument('--steps-per-ckpts', type=int, default=256, help="Steps per checkpoints") group.add_argument('--optimizer', type=str, default="momentum", choices=['sgd', 'momentum', 'adamw', 'lamb'], help="Optimizer") group.add_argument('--momentum', type=float, default=0.984375, help="Momentum coefficient.") group.add_argument('--beta1', type=float, default=0.9, help="lamb/adam beta1 coefficient.") group.add_argument('--beta2', type=float, default=0.999, help="lamb/adam beta2 coefficient.") group.add_argument('--weight-decay-rate', type=float, default=0.0, help="Weight decay to use during optimisation.") group.add_argument('--epsilon', type=float, default=1e-4, help="Optimiser epsilon value.") group.add_argument('--lr-schedule', default='exponential', choices=["custom", "natural_exponential", "polynomial"], help="Learning rate schedule function.") group.add_argument( '--lr-schedule-by-step', type=str, help="Dictonary of changes in the learning rate at specified steps.") group.add_argument( '--warmup', default=0.1, help= "Learning rate schedule warm-up period, in epochs (float) or number of steps (integer)." ) group.add_argument('--seed', default=None, help="Seed for randomizing training") group.add_argument( '--wandb', action='store_true', help="Enable logging and experiment tracking with Weights & Biases.") group.add_argument('--wandb-name', type=str, default=None, help="Override Weights&Biases run name.") group.add_argument('--save-path', type=str, default="checkpoints", help='Save checkpoints to this directory.') group.add_argument( '--init-checkpoint', type=str, default=None, help='Initialise a new training session from this checkpoint.') group.add_argument( '--restore-dir', type=str, default=None, help='Path to directory containing the checkpoint to restore.') group.add_argument('--restore-optimiser-from-checkpoint', default=True, action="store_true") group.add_argument('--save-optimiser-to-checkpoint', default=True, action="store_true") group.add_argument( '--epochs', default=None, type=float, help='Number of epochs we want to let the training last.') # BERT options group.add_argument( '--vocab-size', type=int, help="""Vocabulary size of `inputs_ids` in `BertModel`.""") group.add_argument( '--hidden-size', type=int, help="""Size ofthe encoder layers and the pooler layer.""") group.add_argument( '--num-hidden-layers', type=int, help="""Number of hidden layers in the Transformer encoder.""") group.add_argument( '--num-attention-heads', type=int, help= """Number of attention heads for each attention layer in the Transformer encoder.""" ) group.add_argument( '--intermediate-size', type=int, help= """The size of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.""" ) group.add_argument( '--hidden-act', type=int, help= """The non-linear activation function (function or string) in the encoder and pooler.""" ) group.add_argument( '--hidden-dropout-prob', type=int, help= """The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.""" ) group.add_argument( '--attention-probs-dropout-prob', type=int, help="""The dropout ratio for the attention probabilities.""") group.add_argument( '--max-position-embeddings', type=int, help= """The maximum sequence length that this model might ever be used with. Typically set this to something large just in case (e.g., 512 or 1024 or 2048).""" ) group.add_argument( '--type-vocab-size', type=int, help= """The vocabulary size of the `token_type_ids` passed into `BertModel`.""" ) group.add_argument( '--initializer-range', type=int, help= """The stdev of the truncated-normal-initializer for initializing all weight matrices.""" ) group.add_argument('--disable-acc', default=False, action='store_true', help='Makes the calculation of the accuracy optional.') group.add_argument('--use-qkv-bias', default=False, action='store_true', help='Use biases for QKV layer.') group.add_argument('--use-qkv-split', default=False, action='store_true', help='Split QKV layer.') # Model options group.add_argument( '--use-attention-projection-bias', type=_str_to_bool, default=True, help="Whether to use bias in linear projection behind attention layer." ) group.add_argument( '--use-cls-layer', type=_str_to_bool, default=True, help= """Include the CLS layer in pretraining. This layer comes after the encoders but before the projection for the MLM loss.""" ) group.add_argument('--use-prediction-bias', type=_str_to_bool, default=True, help="""Whether to use bias in mlm prediction.""") group.add_argument( '--static-mask', action='store_true', default=False, help= "Use if the pretraining dataset was created with the masked tokens always at the beginning of the input tensor." ) group.add_argument("--max-predictions-per-seq", type=int, default=20, help="the number of masked words per sentence.") group.add_argument( '--duplicate-factor', default=5, type=int, help='The amount of duplication factor inside the dataset.') # GroupBert architecture options group.add_argument('--groupbert', action='store_true', default=False, help="Use GroupBert architecture") group.add_argument('--groupbert-ffn-output-groups', type=int, default=4, help="Set number of groups for FFN layer in GroupBert") group.add_argument('--groupbert-conv-kernel-size', type=int, default=7, help="Set size of convolution kernel in GroupBert") group.add_argument('--groupbert-conv-group-size', type=int, default=16, help="Set size of convolution groups in GroupBert") # IPU options pipeline_schedule_options = [ _.name for _ in ipu.ops.pipelining_ops.PipelineSchedule ] schedulers_available = [_.name for _ in ipu.config.SchedulingAlgorithm] recomputation_modes_available = [ p.name for p in ipu.ops.pipelining_ops.RecomputationMode ] group.add_argument( '--gradient-accumulation-count', type=int, default=None, help= "Number of gradients to accumulate in the pipeline. Must also set --shards > 1." ) group.add_argument('--pipeline-schedule', type=str, default="Interleaved", choices=pipeline_schedule_options, help="Pipelining scheduler.") group.add_argument( '--replicas', type=int, default=1, help= "Replicate graph over N workers to increase batch to micro-batch-size*N" ) group.add_argument( '--precision', type=str, default="16", choices=["16", "32"], help= "Precision of Ops(weights/activations/gradients) data types: 16, 32.") group.add_argument( '--batches-per-step', type=int, default=1, help= "Maximum number of batches to perform on the device before returning to the host." ) group.add_argument( '--available-memory-proportion', type=str, default=0.23, help= "Proportion of IPU memory available to matmul operations. A list can be used to specify the value for each IPU." ) group.add_argument( '--variable-offloading', type=_str_to_bool, default=True, help="Enable offloading of training variables into remote memory.") group.add_argument( '--min-remote-tensor-size', type=int, default=128, help= "The minimum remote tensor size (bytes) for partial variable offloading" ) group.add_argument( '--replicated-tensor-sharding', action="store_true", default=False, help="Enable replicated tensor sharding of optimizer state.") group.add_argument( '--stochastic-rounding', type=_str_to_bool, default=True, help="Enable stochastic rounding. Set to False when run evaluation.") group.add_argument( '--no-outlining', type=_str_to_bool, default=False, help= "Disable TF outlining optimisations. This will increase memory for a small throughput improvement." ) group.add_argument("--enable_recomputation", default=True, action="store_true", help="Recompute activations during backward pass") group.add_argument('--fp-exceptions', default=False, action="store_true", help="Enable floating-point exeptions.") group.add_argument( '--partials-type', type=str, default="half", choices=["half", "float"], help= "Floating-point precision of data in matmul and convolution operations.." ) group.add_argument( '--max-cross-replica-sum-buffer-size', type=int, default=10 * 1024 * 1024, help= "The maximum number of bytes that can be waiting before a cross replica sum op is scheduled." ) group.add_argument( '--max-reduce-scatter-buffer-size', type=int, default=0, help= "The maximum number of bytes that can be waiting before reduce-scatter op is scheduled." ) group.add_argument( '--scheduler', type=str, default="CLUSTERING", choices=schedulers_available, help= "Forces the compiler to use a specific scheduler when ordering the instructions." ) group.add_argument('--recomputation-mode', type=str, default="RecomputeAndBackpropagateInterleaved", choices=recomputation_modes_available) group.add_argument( '--increase-optimiser-precision', action='store_true', default=False, help= "In the LAMB optimiser, it performs more operations in fp32. This operation increase precision in the weight update but consumes more memory and reduce the Tput." ) group.add_argument( '--use-nvlamb', action='store_true', default=False, help="Flag to use the global normalisation for the gradients.") group.add_argument( '--use-debiasing', action='store_true', default=False, help="Flag to use the de biasing for the momenta of LAMB") group.add_argument( '--reduction-type', type=str, choices=['sum', 'mean'], default='mean', help= 'The reduction type applied to the pipeline, the choice is between summation and mean.' ) group.add_argument( '--weight-norm-clip', type=float, default=0., help= 'The value from which we want to clip the w_norm value, value of 0 is no weight clipping.' ) group.add_argument( '--compile-only', action="store_true", default=False, help= "Configure Poplar to only compile the graph. This will not acquire any IPUs and thus facilitate profiling without using hardware resources." ) group.add_argument( '--matmul-serialize-factor', type=int, default=6, help= 'Serialization factor of the embeddings lookup and projection. Must be a divisor of vocab_size.' ) group.add_argument('--glue-dropout-prob', type=float, default=0.1, help='GLUE tasks dropout probability.') group.add_argument( '--pipeline-stages', type=str, help= """Pipeline stages, a list of [emb, pos, hid, mlm, nsp] layers forming the pipeline.""" ) group.add_argument('--device-mapping', type=str, help="""Mapping of pipeline stages to IPU""") group.add_argument('--sync-replicas-independently', action='store_true', default=False, help='All the replicas will be in sync.') group.add_argument( '--log-all-workers', action='store_true', help='Allow all the workers to log into the terminal and the files.') # Dataset options group.add_argument( '--train-file', type=str, required=False, help="path to wiki/corpus training dataset tfrecord file.") group.add_argument("--seq-length", type=int, default=128, help="the max sequence length.") group.add_argument('--parallel-io-threads', type=int, default=4, help="Number of cpu threads used to do data prefetch.") group.add_argument( '--generated-data', action="store_true", default=False, help= "Generates synthetic-data on the host and then use it for training.") group.add_argument( '--synthetic-data', action='store_true', help="Run the model completely detaching it from the host.") group.add_argument('--dataset-repeat', type=int, default=1, help="Number of times dataset to repeat.") # Env flag specific arguments group.add_argument( '--execution-profile', action='store_true', help= 'Sets the Poplar engine options to output an execution profile to the profile-dir.' ) group.add_argument( '--memory-profile', action='store_true', help= 'Sets the Poplar engine options to output a memory profile to the profile-dir.' ) group.add_argument( '--profile-dir', type=str, default='./', help='Defines the directory where the profile will be found.') group.add_argument( '--progress-bar', type=str, choices=['auto', 'true', 'false'], default='auto', help= 'The compilation progress bar for the compilation. Pass false to disable it.' ) # fmt:on # Add logging-specific arguments log.add_arguments(parser) return parser
default_output_filename = os.path.join(cwd, "legit_trace_%d.txt") parser = argparse.ArgumentParser(description='Creates trace files for cross-validation training of Beer Garden. " \ "Outputs several files with filename template given by OUTPUT_FILENAME (default = %s).' % default_output_filename) parser.add_argument("-p", "--pages", type=str, default=default_pages_filename, help="Default=%(default)s. The JSON file containing output from get_pages.py") parser.add_argument("-d", "--diff", type=float, default=0.0, help="Default=%(default)f. The proportion of URL accesses that should be diffs") parser.add_argument("-n", "--num-urls", type=int, default=100, help="Default=%(default)d. The number of URLs to generate") parser.add_argument("-r", "--partitions", type=int, default=2, help="Default=%(default)d. The number of ways to partitions to produce for cross validation.") parser.add_argument("-o", "--output_filename", type=str, default=default_output_filename, help="Default=%(default)s. The filename template for output files (must contain %%d).") log.add_arguments(parser) args = parser.parse_args() logger = log.getLogger(args) logger.info("Command line arguments: %s" % str(args)) args.output_filename = os.path.realpath(args.output_filename) try: test_filename = args.output_filename % 1 except TypeError: logger.critical("output filename must contain exactly one %d") sys.exit(1) try: with open(args.pages, "r") as f: pass except:
def run_client(name, desc, default_concurrent_puzzles, default_timeout): desc += " WARNING: this script does not have an off switch. You must forcefully kill it with someting like " + \ "pkill -f 'python.*%s'" % name cwd = os.getcwd() parser = argparse.ArgumentParser(description=desc) parser.add_argument("-s", "--server", type=str, required=True, help="the domain part of the URL to submit requests to; e.g. 'localhost'") parser.add_argument("-u", "--url", type=str, default=None, help="Default=%(default)s. the rest of the url (not including SERVER); e.g. '/index.php'") parser.add_argument("-f", "--url-file", type=str, default=None, help="Default=%(default)s. filename contains urls, one per line (not including SERVER); e.g. '/index.php'") parser.add_argument("-to", "--timeout", type=float, default=default_timeout, help="Default=%(default)s. Connections timeout after TIMEOUT seconds.") parser.add_argument("-r", "--regex", type=str, required=True, help="regular expression that positively matches the target web-app page, NOT the puzzle page, and NOT 403 pages or anything else; e.g. MediaWiki") parser.add_argument("-e", "--rate", type=float, default=10, help="Default=%(default)s. Number of requests per second") parser.add_argument("-d", "--duration", type=int, default=5, help="Default=%(default)s. Duration of trial in seconds.") parser.add_argument("-z", "--concurrent-puzzles", type=int, default=default_concurrent_puzzles, help="Default=%(default)s. The maximum number of clients allowed to work on puzzles at the same time. If CONCURRENT_PUZZLES <= 0, then CONCURRENT_PUZZLES will be set to infinity.") parser.add_argument("-i", "--id", type=str, default=1, help="Default=%(default)s. An id to identify this process in the logs") parser.add_argument("-y", "--history", type=int, default=20, help="Default=%(default)s. When displaying averages, only use the last HISTORY measurements.") parser.add_argument("-p", "--poisson", action="store_true", default=False, help="Set this flag to issue requests as poisson process. Else, issue requests as a non-random process.") parser.add_argument("-a", "--trace-filename", type=str, default= name + ".csv", help = '''Default=%(default)s. Name of output tracefile. The output tracefile is a CSV with one row per event. Each row has 4 fields: (1) status, (2) event, (3) latency, (4) num_bits. For further explanation, see the source.''') log.add_arguments(parser) args = parser.parse_args() logger = log.getLogger(args, name="%s.%s" % (name, args.id)) logger.info("Command line arguments: %s" % str(args)) queue = Queue.Queue() if args.url_file: urls = [] with open(args.url_file) as f: for line in f: urls.append(line.strip()) elif args.url: urls = [args.url] else: logger.error("Missing --url or --url-file arguments") sys.exit(1) logger.info("urls = %s ...", urls[:10]) monitor = Monitor.spawn(logger, queue, args.history, args.trace_filename) jobs = [] period = 1.0 / args.rate requests = int(args.rate * args.duration) + 1 # the total amount of time this greenlet should have spent sleeping expected_duration = 0.0 puzzles_being_solved = PuzzlesBeingSolved() start_time = time.time() num_urls = len(urls) for i in range(0, requests): url = urls[i % num_urls] job = ClientGreenlet.spawn(logger, queue, args.server, url, args.timeout + 10, \ args.regex, i + 1, args.concurrent_puzzles, puzzles_being_solved) jobs.append(job) if args.poisson: sleep_time = random.expovariate(1.0/period) else: sleep_time = period actual_duration = time.time() - start_time # If you overslept last time, then reduce your sleeptime now in order to catch up overslept = max(0, actual_duration - expected_duration) requested_sleep_time = max(0, sleep_time - overslept) expected_duration += sleep_time logger.debug("sleeping for %f sec before next request", requested_sleep_time) if requested_sleep_time > 0.0: gevent.sleep(requested_sleep_time) # if actual_duration significantly longer than duration, then this process is too CPU bound # need to slow down the rate actual_duration = time.time() - start_time if actual_duration > (expected_duration * 1.05): logger.error("Actual duration (%f) significantly longer then specified duration (%f). Could not send requests " \ "fast enough" % (actual_duration, expected_duration)) else: logger.debug("Actual duration (%f) NOT significantly longer then specified duration (%f). Sent requests " \ "fast enough" % (actual_duration, expected_duration)) # could not get joinall to work for i in range(0, int(args.timeout) + 1): # get rid of all jobs that have finished jobs = filter(lambda j: not j.ready(), jobs) if len(jobs) == 0: break gevent.sleep(1) jobs = filter(lambda j: not j.ready(), jobs) if len(jobs) > 0: for job in jobs: job.kill(block=True, timeout=1) monitor.kill()
jobs = filter(lambda j: not j.ready(), jobs) if len(jobs) > 0: for job in jobs: job.kill(block=True, timeout=1) monitor.kill() if __name__ == "__main__": cwd = os.getcwd() parser = argparse.ArgumentParser(description='Solves Doorman puzzles. Solves puzzle given by stdin. Example usage: curl -s "http://localhost/index.php" | ./puzzle_solver.py | xargs -I REQ curl -s "http://localhostREQ"') parser.add_argument("-s", "--sleep-time", type=float, default=None, help="If specified, overrides the puzzles sleep time (num secs to sleep between bursts)") parser.add_argument("-b", "--burst_len", type=int, default=None, help="If specified, overrides the puzzles burst_len") log.add_arguments(parser, "CRITICAL", "CRITICAL") args = parser.parse_args() logger = log.getLogger(args) logger.info("Command line arguments: %s" % str(args)) solver = PuzzleSolver(logger, sys.stdin.read()) solver.sleep_time = args.sleep_time or solver.sleep_time solver.burst_len = args.burst_len or solver.burst_len print solver.solve()