Exemple #1
0
    def __init__(self, *args, pybase_logger_name=None, **kwargs):
        super().__init__(*args, **kwargs)

        if pybase_logger_name is None:
            pybase_logger_name = self.__class__.__name__

        self._pybase_logger_name = pybase_logger_name

        self._valid = True
        self._log = get_logger(self._pybase_get_logger_name())
Exemple #2
0
def chat_with(bot, user_name="You", logger=None):
    if logger is None:
        logger = get_logger("Bot")

    while True:
        logger.info(f'{user_name} :')
        # 3) ask for input
        user_input = input()
        sys.stdout.write('\n')
        sys.stdout.flush()

        response = bot.respond_to(user_input)

        if response["system"] == "quit":
            break
Exemple #3
0
def worker_fn(rank, args, world_size):

    distributed = args.distributed
    is_primary = rank == 0

    mlp.logging.use_fancy_colors()

    # ########### EXPERIMENT SETUP ############
    torch.random.manual_seed(args.seed)  # For reproducibility

    if distributed:
        logger_name = f"[Device {rank}] {os.path.basename(__file__)}"
    else:
        logger_name = os.path.basename(__file__)

    logger = get_logger(logger_name)
    # ########################################

    # ############## DEVICE SETUP ##############
    use_cuda = torch.cuda.is_available()
    if use_cuda:
        torch.cuda.set_device(rank)

        if distributed:
            os.environ['MASTER_ADDR'] = 'localhost'
            os.environ['MASTER_PORT'] = '12355'

            dist.init_process_group(backend='nccl',
                                    rank=rank,
                                    world_size=world_size)

            logger.info(
                f"Training using multiple GPUs: Using GPU {rank}/{world_size}")
        else:
            logger.info(f"Single device mode : Using GPU {rank} ")
    else:
        if distributed:
            logger.error(
                f"No GPUs available for data distributed training over multiple GPUs"
            )
            return

        logger.info(f"Single device mode : Using CPU")

    device = torch.device("cuda" if use_cuda else "cpu")
    # ########################################

    # ########## SETUP BATCH DATASETS ##########
    if distributed and not is_primary:
        dist.barrier()

    training_data, test_data = load_data()

    if distributed and is_primary:
        dist.barrier()

    training_sampler = None
    validation_sampler = None
    if distributed:
        training_sampler = torch.utils.data.distributed.DistributedSampler(
            training_data)
        validation_sampler = torch.utils.data.distributed.DistributedSampler(
            test_data)

    training_dataset = torch.utils.data.DataLoader(
        training_data,
        batch_size=args.batch_size,
        shuffle=(training_sampler is None),
        sampler=training_sampler,
        num_workers=3)

    # Using the test set as a validation set, just for demonstration purposes
    validation_dataset = torch.utils.data.DataLoader(
        test_data,
        batch_size=args.batch_size,
        shuffle=(validation_sampler is None),
        sampler=validation_sampler,
        num_workers=3)
    # ##########################################

    # ############ BUILD THE MODEL #############
    classifier = build_model(args.hidden_size)

    train_model = TrainModel(classifier, device)

    # Move model to assigned GPU (see torch.cuda.set_device(args.local_rank))
    classifier.to(device)
    if distributed:
        train_model = DDP(train_model, device_ids=[rank])
    # ############################################

    # ############ SETUP OPTIMIZER #############
    optimizer = torch.optim.Adam(classifier.parameters(),
                                 lr=args.learning_rate)
    # ##########################################

    # ############# SETUP TRAINING ##############
    trainer = mlp.trainers.DefaultTrainer(optimizers=optimizer,
                                          model_components=classifier)

    model_hyper_parameters = {"hidden_size": args.hidden_size}

    callbacks = create_callbacks_for(trainer, args.experiment_name,
                                     model_hyper_parameters, is_primary,
                                     validation_dataset,
                                     args.progress_log_period)

    manager = mlp.trainers.TrainingManager(trainer,
                                           training_dataset,
                                           num_epochs=args.num_epochs,
                                           callbacks=callbacks,
                                           experiment_data={"args": args})

    trainer.set_training_model(train_model)
    # ##########################################

    # ################# START! #################
    manager.start_training()
    # ##########################################

    logger.info("DONE.")
Exemple #4
0
    classifier.eval()
    with torch.no_grad():
        logits = classifier(image)
        probabilities = torch.softmax(logits, dim=-1)

        predicted_label = torch.argmax(probabilities)

        logger.info(
            f"real label = {real_label}, predicted label = {predicted_label}\n"
        )


if __name__ == '__main__':
    # ############# SETUP LOGGING #############
    mlp.logging.use_fancy_colors()
    logger = get_logger(os.path.basename(__file__))
    # ########################################

    # ############## PARSE ARGS ##############
    parser = base_argument_set()

    parser.parse_args()

    args = parser.parse_args()

    describe_args(args, logger)

    # ############## TRAIN MODEL ##############
    if args.distributed:
        num_gpus_available = torch.cuda.device_count()
        world_size = args.num_devices if args.num_devices > 0 else num_gpus_available
Exemple #5
0
    parser.add_argument(
        '--feed-forward-layer-size',
        type=int, required=False, default=3072,
        help='Element-wise feed forward layer size')

    args = parser.parse_args()

    if args.remote_debug:
        import pydevd_pycharm
        pydevd_pycharm.settrace('192.168.178.85', port=57491, stdoutToServer=True, stderrToServer=True)

    mlp.logging.use_fancy_colors()

    logger_name = os.path.basename(__file__)
    logger = get_logger(logger_name)

    # TODO : seed
    # seed = args.seed
    # logger.info(f"Seed : {seed}")
    # np.random.seed(args.seed)

    use_mixed_precision = args.float16

    ##################################################
    #
    # [START] Setup
    #
    ##################################################

    # ############ Conversations dataset #############
Exemple #6
0
def worker_fn(rank, flags):
    args = flags['args']
    world_size = flags['world_size']

    distributed = args.distributed
    is_primary = rank == 0

    mlp.logging.use_fancy_colors()

    # ########## EXPERIMENT SETUP  ###########
    torch.random.manual_seed(args.seed)  # For reproducibility

    if distributed:
        logger_name = f"[Device {rank}] {os.path.basename(__file__)}"
    else:
        logger_name = os.path.basename(__file__)

    logger = get_logger(logger_name)
    # ########################################

    # ############## DEVICE SETUP ##############
    xla_available = len(xm.get_xla_supported_devices()) > 0
    if not xla_available:
        logger.error("No XLA devices available, unable to train")
        return

    if distributed:
        logger.info(
            f"Training using multiple XLA devices: Using XLA device {rank}/{world_size}"
        )
    else:
        logger.info(f"Single XLA device mode : Using XLA device {rank} ")

    device = xm.xla_device()
    # ########################################

    # ########## SETUP BATCH DATASETS ##########
    if distributed and not is_primary:
        xm.rendezvous("loading_data")

    training_data, test_data = load_data()

    if distributed and is_primary:
        xm.rendezvous("loading_data")

    training_sampler = None
    validation_sampler = None
    if distributed:
        training_sampler = torch.utils.data.distributed.DistributedSampler(
            training_data, num_replicas=world_size, rank=rank)
        validation_sampler = torch.utils.data.distributed.DistributedSampler(
            test_data, num_replicas=world_size, rank=rank)

    training_dataset = torch.utils.data.DataLoader(
        training_data,
        batch_size=args.batch_size,
        shuffle=(training_sampler is None),
        sampler=training_sampler,
        num_workers=3)

    # Using the test set as a validation set, just for demonstration purposes
    validation_dataset = torch.utils.data.DataLoader(
        test_data,
        batch_size=args.batch_size,
        shuffle=(validation_sampler is None),
        sampler=validation_sampler,
        num_workers=3)
    # ##########################################

    # ############ BUILD THE MODEL #############
    classifier = build_model(args.hidden_size)

    train_model = TrainModel(classifier, device)

    # Move model to assigned GPU (see torch.cuda.set_device(args.local_rank))
    classifier.to(device)
    # ############################################

    # ############ SETUP OPTIMIZER #############
    optimizer = torch.optim.Adam(classifier.parameters(),
                                 lr=args.learning_rate)
    # ##########################################

    # ############# SETUP TRAINING ##############
    trainer = mlp.trainers.DefaultTrainer(optimizers=optimizer,
                                          model_components=classifier)

    model_hyper_parameters = {"hidden_size": args.hidden_size}

    callbacks = create_callbacks_for(trainer, args.experiment_name,
                                     model_hyper_parameters, is_primary,
                                     validation_dataset,
                                     args.progress_log_period)

    manager = mlp.trainers.TrainingManager(trainer,
                                           training_dataset,
                                           num_epochs=args.num_epochs,
                                           callbacks=callbacks,
                                           experiment_data={"args": args})

    trainer.set_training_model(train_model)
    # ##########################################

    # ################# START! #################
    manager.start_training()
    # ##########################################

    logger.info("DONE.")