def get_popdist_device(args, request_ipus): ipus_per_replica = request_ipus // args.replication_factor if not popdist.checkNumIpusPerReplica(ipus_per_replica): raise RuntimeError(f"The number IPUs per replica ({ipus_per_replica}) required for the model configuration" f" does not match the specified popdist IPUs per replica ({popdist.getNumIpusPerReplica()})") args.device_id = popdist.getDeviceId(ipus_per_replica) return get_device_by_id(args, request_ipus)
if __name__ == '__main__': tf.logging.set_verbosity(tf.logging.ERROR) opts = make_global_options([add_pretraining_options]) opts['shards'] = ipu_utils.next_power_of_two( max(opts["device_mapping"]) + 1) if popdist.isPopdistEnvSet(): opts['use_popdist'] = True opts['replicas'] = popdist.getNumLocalReplicas() opts['total_replicas'] = popdist.getNumTotalReplicas() if opts['compile_only']: opts['select_ipu'] = None else: opts['select_ipu'] = popdist.getDeviceId() else: opts['use_popdist'] = False opts['total_replicas'] = opts['replicas'] opts['select_ipu'] = None set_defaults(opts) set_poplar_engine_options(execution_profile=opts['execution_profile'], memory_profile=opts['memory_profile'], profile_dir=str(opts['profile_dir']), sync_replicas_independently=opts['replicas'] > 1 and opts['sync_replicas_independently'], synthetic_data=opts['synthetic_data'], tensorflow_progress_bar=opts['progress_bar'])
type=str, default="./ckpt_init/yolov3_coco_converted.fp16.ckpt", help="ckpt init weight") arguments = parser.parse_args() with open(arguments.config) as f: opts = json.load(f) opts['train']['annot_path'] = arguments.train_path opts['train']['initial_weight'] = arguments.init_weight opts['test']['annot_path'] = arguments.test_path if popdist.isPopdistEnvSet(): opts["use_popdist"] = True opts["train"]["replicas"] = popdist.getNumLocalReplicas() opts["train"]["total_replicas"] = popdist.getNumTotalReplicas() opts["select_ipu"] = popdist.getDeviceId( len(opts["train"]["device_mapping"])) opts["distributed_worker_count"] = int(popdist.getNumTotalReplicas() / popdist.getNumLocalReplicas()) opts["distributed_worker_index"] = int( popdist.getReplicaIndexOffset() / popdist.getNumLocalReplicas()) opts["use_popdist"] = True else: opts["use_popdist"] = False opts["train"]["total_replicas"] = opts["train"]["replicas"] opts["select_ipu"] = -1 opts["distributed_worker_count"] = 1 opts["distributed_worker_index"] = 0 opts["use_popdist"] = False # for each instance will have difference seed, so data will be shuffled differently
amps = opts['available_memory_proportion'] if amps and len(amps) > 1: if not opts['pipeline']: raise ValueError( '--available-memory-proportion should only have one value unless using pipelining' ) if len(amps) != int(opts['shards']) * 2: raise ValueError( '--available-memory-proportion should have either one value or 2*shards values specified' ) if popdist.isPopdistEnvSet(): opts['use_popdist'] = True opts['replicas'] = popdist.getNumLocalReplicas() opts['total_replicas'] = popdist.getNumTotalReplicas() opts['select_ipu'] = str(popdist.getDeviceId(opts['shards'])) else: opts['use_popdist'] = False opts['total_replicas'] = opts['replicas'] opts["command"] = ' '.join(sys.argv) set_defaults(model, lr_schedule, opts) if opts['dataset'] == 'imagenet': if opts['image_size'] is None: opts['image_size'] = 224 if opts['image_size'] != 224: opts['name'] += '_{}x{}'.format(opts['image_size'], opts['image_size']) opts['summary_str'] += "Image Size: {}x{}\n".format( opts['image_size'], opts['image_size'])