Ejemplo n.º 1
0
def main():
    lstm_size = 1024
    lstm_layers = 3

    with monit.section("Loading data"):
        files = parser.load.load_files()
        train_files, valid_files = parser.load.split_train_valid(
            files, is_shuffle=False)

    with monit.section("Create model"):
        model = SimpleLstmModel(encoding_size=tokenizer.VOCAB_SIZE,
                                embedding_size=tokenizer.VOCAB_SIZE,
                                lstm_size=lstm_size,
                                lstm_layers=lstm_layers)
        model.to(device)

    experiment.add_pytorch_models({'base': model})

    experiment.load("94ab8470e6a711ea9703c1dbf199539e", 5654)

    # For debugging with a specific piece of source code
    # predictor = Predictor(model, lstm_layers, lstm_size)
    # for s in ['""" """\n', "from __future__"]:
    #     predictor.add(s)
    # s = predictor.get_suggestion()

    # Evaluate all the files in validation set
    for file in valid_files:
        logger.log(str(file.path), Text.heading)
        evaluator = Evaluator(model,
                              file,
                              lstm_layers,
                              lstm_size,
                              skip_spaces=True)
        evaluator.eval()
Ejemplo n.º 2
0
def main():
    lstm_size = 1024
    lstm_layers = 3

    with logger.section("Loading data"):
        files = parser.load.load_files()
        train_files, valid_files = parser.load.split_train_valid(files, is_shuffle=False)

    with logger.section("Create model"):
        model = SimpleLstmModel(encoding_size=tokenizer.VOCAB_SIZE,
                                embedding_size=tokenizer.VOCAB_SIZE,
                                lstm_size=lstm_size,
                                lstm_layers=lstm_layers)
        model.to(device)

    EXPERIMENT.add_models({'base': model})

    EXPERIMENT.start_replay()

    # For debugging with a specific piece of source code
    # predictor = Predictor(model, lstm_layers, lstm_size)
    # for s in ['""" """\n', "from __future__"]:
    #     predictor.add(s)
    # s = predictor.get_suggestion()

    # Evaluate all the files in validation set
    for file in valid_files:
        logger.log(str(file.path), color=colors.BrightColor.orange)
        evaluator = Evaluator(model, file,
                              lstm_layers, lstm_size,
                              skip_spaces=True)
        keys_saved = evaluator.eval()

        logger.info(keys_saved=keys_saved)
Ejemplo n.º 3
0
def main_train():
    lstm_size = 1024
    lstm_layers = 3
    batch_size = 32
    seq_len = 32
    is_half = False

    with logger.section("Create model"):
        # Create model
        model = SimpleLstmModel(encoding_size=tokenizer.VOCAB_SIZE,
                                embedding_size=tokenizer.VOCAB_SIZE,
                                lstm_size=lstm_size,
                                lstm_layers=lstm_layers)

        # Use half precision
        if is_half:
            model.half()

        # Move model to `device`
        model.to(device)

        # Create loss function and optimizer
        loss_func = torch.nn.CrossEntropyLoss()
        if is_half:
            optimizer = torch.optim.Adam(model.parameters(), eps=1e-5)
        else:
            optimizer = torch.optim.Adam(model.parameters())

    # Initial state is 0
    if is_half:
        dtype = torch.float16
    else:
        dtype = torch.float32
    h0 = torch.zeros((lstm_layers, batch_size, lstm_size),
                     device=device,
                     dtype=dtype)
    c0 = torch.zeros((lstm_layers, batch_size, lstm_size),
                     device=device,
                     dtype=dtype)

    # Specify the model in [lab](https://github.com/vpj/lab) for saving and loading
    EXPERIMENT.add_models({'base': model})

    # Start training scratch (step '0')
    EXPERIMENT.start_train(True)

    # Setup logger indicators
    logger.add_indicator("train_loss", queue_limit=500, is_histogram=True)
    logger.add_indicator("valid_loss", queue_limit=500, is_histogram=True)

    for epoch in range(100):
        if not run_epoch(epoch, model, loss_func, optimizer, seq_len,
                         batch_size, h0, c0):
            break
Ejemplo n.º 4
0
def main_train():
    lstm_size = 1024
    lstm_layers = 3
    batch_size = 32
    seq_len = 32

    with logger.section("Loading data"):
        # Load all python files
        files = parser.load.load_files()
        # Split training and validation data
        train_files, valid_files = parser.load.split_train_valid(files, is_shuffle=False)

    with logger.section("Create model"):
        # Create model
        model = SimpleLstmModel(encoding_size=tokenizer.VOCAB_SIZE,
                                embedding_size=tokenizer.VOCAB_SIZE,
                                lstm_size=lstm_size,
                                lstm_layers=lstm_layers)
        # Move model to `device`
        model.to(device)

        # Create loss function and optimizer
        loss_func = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters())

    # Initial state is 0
    h0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device)
    c0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device)

    # Setup logger indicators
    logger.add_indicator("train_loss", queue_limit=500, is_histogram=True)
    logger.add_indicator("valid_loss", queue_limit=500, is_histogram=True)

    # Specify the model in [lab](https://github.com/vpj/lab) for saving and loading
    EXPERIMENT.add_models({'base': model})

    # Start training scratch (step '0')
    EXPERIMENT.start_train(0)

    # Number of batches per epoch
    batches = math.ceil(sum([len(f[1]) + 1 for f in train_files]) / (batch_size * seq_len))

    # Number of steps per epoch. We train and validate on each step.
    steps_per_epoch = 200

    # Train for 100 epochs
    for epoch in logger.loop(range(100)):
        # Create trainer
        trainer = Trainer(files=train_files,
                          model=model,
                          loss_func=loss_func,
                          optimizer=optimizer,
                          batch_size=batch_size,
                          seq_len=seq_len,
                          is_train=True,
                          h0=h0,
                          c0=c0,
                          eof=0)
        # Create validator
        validator = Trainer(files=train_files,
                            model=model,
                            loss_func=loss_func,
                            optimizer=optimizer,
                            is_train=False,
                            seq_len=seq_len,
                            batch_size=batch_size,
                            h0=h0,
                            c0=c0,
                            eof=0)

        # Next batch to train and validation
        train_batch = 0
        valid_batch = 0

        # Loop through steps
        for i in range(1, steps_per_epoch):
            # Set global step
            global_step = epoch * batches + min(batches, (batches * i) // steps_per_epoch)
            logger.set_global_step(global_step)

            # Last batch to train and validate
            train_batch_limit = trainer.x.shape[0] * min(1., (i + 1) / steps_per_epoch)
            valid_batch_limit = validator.x.shape[0] * min(1., (i + 1) / steps_per_epoch)

            try:
                with logger.delayed_keyboard_interrupt():

                    with logger.section("train", total_steps=trainer.x.shape[0], is_partial=True):
                        model.train()
                        # Train
                        while train_batch < train_batch_limit:
                            trainer.run(train_batch)
                            logger.progress(train_batch + 1)
                            train_batch += 1

                    with logger.section("valid", total_steps=validator.x.shape[0], is_partial=True):
                        model.eval()
                        # Validate
                        while valid_batch < valid_batch_limit:
                            validator.run(valid_batch)
                            logger.progress(valid_batch + 1)
                            valid_batch += 1

                    # Output results
                    logger.write()

                    # 10 lines of logs per epoch
                    if (i + 1) % (steps_per_epoch // 10) == 0:
                        logger.new_line()

            except KeyboardInterrupt:
                logger.save_progress()
                logger.save_checkpoint()
                logger.new_line()
                return