def __init__(self, *args, pybase_logger_name=None, **kwargs): super().__init__(*args, **kwargs) if pybase_logger_name is None: pybase_logger_name = self.__class__.__name__ self._pybase_logger_name = pybase_logger_name self._valid = True self._log = get_logger(self._pybase_get_logger_name())
def chat_with(bot, user_name="You", logger=None): if logger is None: logger = get_logger("Bot") while True: logger.info(f'{user_name} :') # 3) ask for input user_input = input() sys.stdout.write('\n') sys.stdout.flush() response = bot.respond_to(user_input) if response["system"] == "quit": break
def worker_fn(rank, args, world_size): distributed = args.distributed is_primary = rank == 0 mlp.logging.use_fancy_colors() # ########### EXPERIMENT SETUP ############ torch.random.manual_seed(args.seed) # For reproducibility if distributed: logger_name = f"[Device {rank}] {os.path.basename(__file__)}" else: logger_name = os.path.basename(__file__) logger = get_logger(logger_name) # ######################################## # ############## DEVICE SETUP ############## use_cuda = torch.cuda.is_available() if use_cuda: torch.cuda.set_device(rank) if distributed: os.environ['MASTER_ADDR'] = 'localhost' os.environ['MASTER_PORT'] = '12355' dist.init_process_group(backend='nccl', rank=rank, world_size=world_size) logger.info( f"Training using multiple GPUs: Using GPU {rank}/{world_size}") else: logger.info(f"Single device mode : Using GPU {rank} ") else: if distributed: logger.error( f"No GPUs available for data distributed training over multiple GPUs" ) return logger.info(f"Single device mode : Using CPU") device = torch.device("cuda" if use_cuda else "cpu") # ######################################## # ########## SETUP BATCH DATASETS ########## if distributed and not is_primary: dist.barrier() training_data, test_data = load_data() if distributed and is_primary: dist.barrier() training_sampler = None validation_sampler = None if distributed: training_sampler = torch.utils.data.distributed.DistributedSampler( training_data) validation_sampler = torch.utils.data.distributed.DistributedSampler( test_data) training_dataset = torch.utils.data.DataLoader( training_data, batch_size=args.batch_size, shuffle=(training_sampler is None), sampler=training_sampler, num_workers=3) # Using the test set as a validation set, just for demonstration purposes validation_dataset = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=(validation_sampler is None), sampler=validation_sampler, num_workers=3) # ########################################## # ############ BUILD THE MODEL ############# classifier = build_model(args.hidden_size) train_model = TrainModel(classifier, device) # Move model to assigned GPU (see torch.cuda.set_device(args.local_rank)) classifier.to(device) if distributed: train_model = DDP(train_model, device_ids=[rank]) # ############################################ # ############ SETUP OPTIMIZER ############# optimizer = torch.optim.Adam(classifier.parameters(), lr=args.learning_rate) # ########################################## # ############# SETUP TRAINING ############## trainer = mlp.trainers.DefaultTrainer(optimizers=optimizer, model_components=classifier) model_hyper_parameters = {"hidden_size": args.hidden_size} callbacks = create_callbacks_for(trainer, args.experiment_name, model_hyper_parameters, is_primary, validation_dataset, args.progress_log_period) manager = mlp.trainers.TrainingManager(trainer, training_dataset, num_epochs=args.num_epochs, callbacks=callbacks, experiment_data={"args": args}) trainer.set_training_model(train_model) # ########################################## # ################# START! ################# manager.start_training() # ########################################## logger.info("DONE.")
classifier.eval() with torch.no_grad(): logits = classifier(image) probabilities = torch.softmax(logits, dim=-1) predicted_label = torch.argmax(probabilities) logger.info( f"real label = {real_label}, predicted label = {predicted_label}\n" ) if __name__ == '__main__': # ############# SETUP LOGGING ############# mlp.logging.use_fancy_colors() logger = get_logger(os.path.basename(__file__)) # ######################################## # ############## PARSE ARGS ############## parser = base_argument_set() parser.parse_args() args = parser.parse_args() describe_args(args, logger) # ############## TRAIN MODEL ############## if args.distributed: num_gpus_available = torch.cuda.device_count() world_size = args.num_devices if args.num_devices > 0 else num_gpus_available
parser.add_argument( '--feed-forward-layer-size', type=int, required=False, default=3072, help='Element-wise feed forward layer size') args = parser.parse_args() if args.remote_debug: import pydevd_pycharm pydevd_pycharm.settrace('192.168.178.85', port=57491, stdoutToServer=True, stderrToServer=True) mlp.logging.use_fancy_colors() logger_name = os.path.basename(__file__) logger = get_logger(logger_name) # TODO : seed # seed = args.seed # logger.info(f"Seed : {seed}") # np.random.seed(args.seed) use_mixed_precision = args.float16 ################################################## # # [START] Setup # ################################################## # ############ Conversations dataset #############
def worker_fn(rank, flags): args = flags['args'] world_size = flags['world_size'] distributed = args.distributed is_primary = rank == 0 mlp.logging.use_fancy_colors() # ########## EXPERIMENT SETUP ########### torch.random.manual_seed(args.seed) # For reproducibility if distributed: logger_name = f"[Device {rank}] {os.path.basename(__file__)}" else: logger_name = os.path.basename(__file__) logger = get_logger(logger_name) # ######################################## # ############## DEVICE SETUP ############## xla_available = len(xm.get_xla_supported_devices()) > 0 if not xla_available: logger.error("No XLA devices available, unable to train") return if distributed: logger.info( f"Training using multiple XLA devices: Using XLA device {rank}/{world_size}" ) else: logger.info(f"Single XLA device mode : Using XLA device {rank} ") device = xm.xla_device() # ######################################## # ########## SETUP BATCH DATASETS ########## if distributed and not is_primary: xm.rendezvous("loading_data") training_data, test_data = load_data() if distributed and is_primary: xm.rendezvous("loading_data") training_sampler = None validation_sampler = None if distributed: training_sampler = torch.utils.data.distributed.DistributedSampler( training_data, num_replicas=world_size, rank=rank) validation_sampler = torch.utils.data.distributed.DistributedSampler( test_data, num_replicas=world_size, rank=rank) training_dataset = torch.utils.data.DataLoader( training_data, batch_size=args.batch_size, shuffle=(training_sampler is None), sampler=training_sampler, num_workers=3) # Using the test set as a validation set, just for demonstration purposes validation_dataset = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=(validation_sampler is None), sampler=validation_sampler, num_workers=3) # ########################################## # ############ BUILD THE MODEL ############# classifier = build_model(args.hidden_size) train_model = TrainModel(classifier, device) # Move model to assigned GPU (see torch.cuda.set_device(args.local_rank)) classifier.to(device) # ############################################ # ############ SETUP OPTIMIZER ############# optimizer = torch.optim.Adam(classifier.parameters(), lr=args.learning_rate) # ########################################## # ############# SETUP TRAINING ############## trainer = mlp.trainers.DefaultTrainer(optimizers=optimizer, model_components=classifier) model_hyper_parameters = {"hidden_size": args.hidden_size} callbacks = create_callbacks_for(trainer, args.experiment_name, model_hyper_parameters, is_primary, validation_dataset, args.progress_log_period) manager = mlp.trainers.TrainingManager(trainer, training_dataset, num_epochs=args.num_epochs, callbacks=callbacks, experiment_data={"args": args}) trainer.set_training_model(train_model) # ########################################## # ################# START! ################# manager.start_training() # ########################################## logger.info("DONE.")