def main(argv=None):
    print("Running on {}".format(device))
    parser = argparse.ArgumentParser(
        description="Train a transformer for a copy task"
    )
    add_optimizer_arguments(parser)
    add_transformer_arguments(parser)
    add_auxiliary_arguments(parser)
    args = parser.parse_args(argv)
    print("args:\n-----\n", args)

    data_points = []
    data_points_acc = []
    n_of_each_model = 10
    n_trials = 8
    for model_type in [ 'transformer','lstm','rnn',]: #add back transformers and rnn
        for max_trained_depth in range(1, 12):
            for ii in range(n_of_each_model):
                print(f'dep{max_trained_depth}_ii_{ii}')
                if model_type == "transformer":
                    d_model = 16
                    model = SequencePredictorRecurrentTransformer(
                                d_model=d_model, n_classes=5,
                                sequence_length=args.sequence_length,
                                attention_type=args.attention_type,
                                n_layers=args.n_layers,
                                n_heads=args.n_heads,
                                d_query=d_model, # used to be d_query
                                dropout=args.dropout,
                                softmax_temp=None,
                                attention_dropout=args.attention_dropout,
                            )
                else:
                    d_model = 8
                    model = SequencePredictorRNN(
                                d_model=d_model, n_classes=5,
                                n_layers=args.n_layers,
                                dropout=args.dropout,
                                rnn_type=model_type
                            )
                print(f"Created model:\n{model}")
                model.to(device)
                model.load_state_dict(torch.load(f"models_from_colab/agreement_models/model_{model_type}_depth_{max_trained_depth}_num_{ii}.zip", map_location=device)['model_state'])
                for test_depth in range(1, 21): # was 1, 32
                    stack_size = test_depth # Change this value to test longer / shorter sequences
                    n_correct = 0
                    for i_trial in range(n_trials):
                        x, y, m = SubjectVerbAgreement.get_seq(stack_size)
                        model.eval()
                        yhat = model(x.unsqueeze(1))
                        loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1))
                        n_correct += acc
                    data_points.append({'model_type': model_type, 'max_trained_depth': max_trained_depth,
                                        'test_depth': test_depth, 'accuracy': n_correct / n_trials})

    print("data points")
    print(data_points)
    with open("data_points_pr_acc_r.txt", "wb") as fp:
        pickle.dump(data_points, fp)
    """
def main(argv=None):
    print("Running on {}".format(device))
    parser = argparse.ArgumentParser(
        description="Train a transformer for a copy task"
    )
    add_optimizer_arguments(parser)
    add_transformer_arguments(parser)
    add_auxiliary_arguments(parser)
    args = parser.parse_args(argv)
    print("args:\n-----\n", args)
    if args.model_type == "transformer":
        model = SequencePredictorRecurrentTransformer(
                    d_model=args.d_model, n_classes=5,
                    sequence_length=args.sequence_length,
                    attention_type=args.attention_type,
                    n_layers=args.n_layers,
                    n_heads=args.n_heads,
                    d_query=args.d_model, # used to be d_query
                    dropout=args.dropout,
                    softmax_temp=None,
                    attention_dropout=args.attention_dropout,
                )
    else:
        model = SequencePredictorRNN(
                    d_model=args.d_model, n_classes=5,
                    n_layers=args.n_layers,
                    dropout=args.dropout,
                    rnn_type=args.model_type
                )
    print(f"Created model:\n{model}")
    model.to(device)
    print("Number of epochs model was trained on: ",torch.load(args.continue_from, map_location=device)['epoch'])
    model.load_state_dict(torch.load(args.continue_from, map_location=device)['model_state'])

    def format_preds(x, y, preds, mask):
        n = len(x)
        n_dig = math.floor(math.log10(n)) + 1
        nums = []
        for p_dig in range(n_dig):
            nums.append( "# |" + "".join([str((i//10**p_dig)%10) for i in range(n)]) + "\n")
        nums = "".join(nums[::-1])
        xs = "x |" + "".join([str(int(v)) for v in x]) + "\n"
        ys = "y |" + "".join([elt if mask[i] == 1 else '?' for i, elt in enumerate([str(int(v)) for v in y])]) + "\n"
        yh = "yh|" + "".join([elt if mask[i] == 1 else '?' for i, elt in enumerate([str(int(v)) for v in preds])]) + "\n"
        return nums + xs + ys + yh

    acc_list = []
    max_acc = None
    for stack_size in range(1, 64):
        x, y, m = SubjectVerbAgreement.get_seq(stack_size)
        # print(x.shape, y.shape, m.shape)
        model.eval()
        yhat = model(x.unsqueeze(1))
        hdn = model.hidden_state # batch x seq x hdn
        loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1))
        acc_list.append((stack_size, acc))
        if acc == 1:
            max_acc = stack_size
    print("Highest perfect score at depth:", max_acc)
    plot_hidden_state_2d(np.array(acc_list), pca=False)

    stack_size = 7 # Change this value to test longer / shorter sequences
    x, y, m = SubjectVerbAgreement.get_seq(stack_size)
    model.eval()
    yhat = model(x.unsqueeze(1))
    hdn = model.hidden_state # batch x seq x hdn
    loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1))
    print("Model loss: ", loss)
    print("Model accuracy: ", acc)
    print(format_preds(x, y, torch.argmax(yhat, dim=2)[0], m))
    plot_hidden_state_2d(hdn[0].detach().cpu().numpy(), pca=True)

    """
Exemple #3
0
def main(argv=None):
    # Choose a device and move everything there
    print("Running on {}".format(device))

    parser = argparse.ArgumentParser(
        description="Train a transformer for an agreement task"
    )
    add_optimizer_arguments(parser)
    add_transformer_arguments(parser)
    add_auxiliary_arguments(parser)
    args = parser.parse_args(argv)
    print("args:\n-----\n", args)
    # Make the dataset and the model
    for model_type in ['rnn', 'lstm', 'transformer']:
        for max_depth in range(1, 12):
          ii = 0
          while ii < 10:
            print(max_depth, ii)
            # skip existing models
            if os.path.isfile("/content/drive/My Drive/final_project_material/agreement_models/model_" + model_type + "_depth_" + str(max_depth) + "_num_" + str(ii)):
              ii += 1
              continue
            train_set = SubjectVerbAgreement(2*max_depth+1, max_depth=max_depth)
            test_set = SubjectVerbAgreement(2*max_depth+1, max_depth=max_depth)
            train_loader = DataLoader(
                train_set,
                batch_size=batch_size,
                pin_memory=device=="cuda"
            )
            test_loader = DataLoader(
                test_set,
                batch_size=batch_size,
                pin_memory=device=="cuda"
            )

            if model_type == "transformer":
                d_model = 16
                model = SequencePredictorRecurrentTransformer(
                            d_model=d_model, n_classes=5,
                            sequence_length=args.sequence_length,
                            attention_type=args.attention_type,
                            n_layers=args.n_layers,
                            n_heads=args.n_heads,
                            d_query=d_model, # used to be d_query
                            dropout=args.dropout,
                            softmax_temp=None,
                            attention_dropout=args.attention_dropout,
                        )
            else:
                d_model=8
                model = SequencePredictorRNN(
                            d_model=d_model, n_classes=5,
                            n_layers=args.n_layers,
                            dropout=args.dropout,
                            rnn_type=model_type
                        )
            print(f"Created model:\n{model}")
            model.to(device)
            # Start training
            optimizer = get_optimizer(model.parameters(), args)
            start_epoch = 1
            if args.continue_from:
                start_epoch = load_model(
                    args.continue_from,
                    model,
                    optimizer,
                    device
                )
            lr_schedule = torch.optim.lr_scheduler.LambdaLR(
                optimizer,
                lambda e: 1. if e < args.reduce_lr_at else 0.1
            )
            for e in range(start_epoch, args.epochs+1):
                print('Epoch:', e)
                print('Training...')
                train(model, optimizer, train_loader, device)
                print('Evaluating...')
                acc = evaluate(model, test_loader, device, return_accuracy=True)
                lr_schedule.step()
                if e == 100:
                    break
                if acc >= 0.95:
                    save_model("/content/drive/My Drive/final_project_material/agreement_models/model_" + model_type + "_depth_" + str(max_depth) + "_num_" + str(ii),
                    model, optimizer, e)
                    ii += 1
                    break
Exemple #4
0
def main(argv=None):
    parser = argparse.ArgumentParser(
        description="Train a transformer to generate images")

    add_transformer_arguments(parser)
    add_optimizer_arguments(parser)
    add_dataset_arguments(parser)

    parser.add_argument("--mixtures",
                        type=int,
                        default=10,
                        help="How many logistics to use to model the output")

    parser.add_argument("--iterations",
                        type=int,
                        default=100,
                        help="How many iterations to train for")
    parser.add_argument("--batch_size",
                        type=int,
                        default=4,
                        help="How many samples to use together")

    parser.add_argument("--save_to",
                        default=None,
                        help="Set a file to save the models to.")
    parser.add_argument("--continue_from",
                        default=None,
                        help="Load the model from a file")
    parser.add_argument("--save_frequency",
                        default=3000,
                        type=int,
                        help="Save every that many steps")
    parser.add_argument(
        "--evaluate_frequency",
        default=3000,
        type=int,
        help="Evaluate on the test set after that many iterations")
    parser.add_argument(
        "--yield_frequency",
        default=10**9,
        type=int,
        help="Stop after that many iterations so that other jobs can run")

    args = parser.parse_args(argv)
    print_transformer_arguments(args)

    # Make the dataset and the model
    train_set, test_set = get_dataset(args)

    model = ImageGenerator(args.d_query * args.n_heads,
                           train_set.sequence_length,
                           args.mixtures,
                           attention_type=args.attention_type,
                           n_layers=args.n_layers,
                           n_heads=args.n_heads,
                           d_query=args.d_query,
                           dropout=args.dropout,
                           softmax_temp=None,
                           attention_dropout=args.attention_dropout,
                           bits=args.bits,
                           rounds=args.rounds,
                           chunk_size=args.chunk_size,
                           masked=True)

    # Choose a device and move everything there
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("Running on {}".format(device))
    model.to(device)

    # Start training
    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              pin_memory=device == "cuda")
    test_loader = DataLoader(test_set,
                             batch_size=args.batch_size,
                             pin_memory=device == "cuda")
    optimizer = get_optimizer(model.parameters(), args)
    iteration = 0
    if args.continue_from:
        iteration = load_model(args.continue_from, model, optimizer, device)
    optimizer.set_lr(args.lr)

    callbacks = callback_chain(
        saver(args.save_frequency, args.save_to, model, optimizer),
        evaluator(args.evaluate_frequency, model, test_loader, device),
        stopper(args.yield_frequency))
    yielded = train(model, optimizer, train_loader, iteration, args.iterations,
                    callbacks, device)

    # Non-zero exit code to notify the process watcher that we yielded
    if yielded:
        sys.exit(1)
def main(argv=None):
    parser = argparse.ArgumentParser(
        description="Train a transformer for a copy task")

    add_optimizer_arguments(parser)
    add_transformer_arguments(parser)

    parser.add_argument("--sequence_length",
                        type=int,
                        default=128,
                        help="Set the maximum sequence length")
    parser.add_argument("--n_classes",
                        type=int,
                        default=10,
                        help="Set the number of classes")

    parser.add_argument("--epochs",
                        type=int,
                        default=100,
                        help="How many epochs to train for")
    parser.add_argument("--batch_size",
                        type=int,
                        default=64,
                        help="How many samples to use together")
    parser.add_argument("--reduce_lr_at",
                        type=int,
                        default=30,
                        help="At this epoch divide the lr by 10")

    parser.add_argument("--save_to",
                        default=None,
                        help="Set a file to save the models to.")
    parser.add_argument("--continue_from",
                        default=None,
                        help="Load the model from a file")
    parser.add_argument("--save_frequency",
                        default=1,
                        type=int,
                        help="Save every that many epochs")

    args = parser.parse_args(argv)
    print_transformer_arguments(args)

    # Make the dataset and the model
    train_set = CopyTask(args.sequence_length, args.n_classes)
    test_set = CopyTask(args.sequence_length, args.n_classes)
    model = SequencePredictor(args.d_query * args.n_heads,
                              args.sequence_length,
                              args.n_classes,
                              attention_type=args.attention_type,
                              n_layers=args.n_layers,
                              n_heads=args.n_heads,
                              d_query=args.d_query,
                              dropout=args.dropout,
                              softmax_temp=None,
                              attention_dropout=args.attention_dropout,
                              bits=args.bits,
                              rounds=args.rounds,
                              chunk_size=args.chunk_size,
                              masked=args.masked)

    # Choose a device and move everything there
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("Running on {}".format(device))
    model.to(device)
    # Start training
    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              pin_memory=device == "cuda")
    test_loader = DataLoader(test_set,
                             batch_size=args.batch_size,
                             pin_memory=device == "cuda")
    optimizer = get_optimizer(model.parameters(), args)
    start_epoch = 1
    if args.continue_from:
        start_epoch = load_model(args.continue_from, model, optimizer, device)
    lr_schedule = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lambda e: 1. if e < args.reduce_lr_at else 0.1)
    for e in range(start_epoch, args.epochs + 1):
        train(model, optimizer, train_loader, device)
        evaluate(model, test_loader, device)
        if (e % args.save_frequency) == 0 and args.save_to:
            save_model(args.save_to, model, optimizer, e)
        lr_schedule.step()
Exemple #6
0
def main(argv=None):
    print("Running on {}".format(device))
    parser = argparse.ArgumentParser(
        description="Train a transformer for a copy task")
    add_optimizer_arguments(parser)
    add_transformer_arguments(parser)
    add_auxiliary_arguments(parser)
    args = parser.parse_args(argv)
    print("args:\n-----\n", args)

    data_points = []
    for model_type in ['rnn', 'lstm', 'transformer']:
        for max_trained_depth in range(1, 12):
            for test_depth in range(1, 21):
                for ii in range(10):
                    if model_type == "transformer":
                        model = SequencePredictorRecurrentTransformer(
                            d_model=16,
                            n_classes=5,
                            sequence_length=args.sequence_length,
                            attention_type=args.attention_type,
                            n_layers=args.n_layers,
                            n_heads=args.n_heads,
                            d_query=8,  # used to be d_query
                            dropout=args.dropout,
                            softmax_temp=None,
                            attention_dropout=args.attention_dropout,
                        )
                    else:
                        model = SequencePredictorRNN(
                            d_model=8 if model_type == 'lstm' else 8,
                            n_classes=5,
                            n_layers=args.n_layers,
                            dropout=args.dropout,
                            rnn_type=model_type)
                    print(f"Created model:\n{model}")
                    model.to(device)
                    model_name = "models_from_colab/agreement_models/model_" + model_type + "_depth_" + str(
                        max_trained_depth) + "_num_" + str(ii) + ".zip"
                    model.load_state_dict(
                        torch.load(model_name,
                                   map_location=device)['model_state'])

                    stack_size = test_depth
                    x, y, m = SubjectVerbAgreement.get_seq(stack_size)
                    model.eval()
                    yhat = model(x.unsqueeze(1))
                    hdn = model.hidden_state  # batch x seq x hdn
                    loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1))
                    data_points.append({
                        'model_type': model_type,
                        'max_trained_depth': max_trained_depth,
                        'test_depth': test_depth,
                        'accuracy': acc
                    })
    print("data points:")
    print(data_points)

    with open("data_points_sva.txt", "wb") as fp:
        pickle.dump(data_points, fp)
    """
Exemple #7
0
def main(argv=None):
    # Choose a device and move everything there
    print("Running on {}".format(device))

    parser = argparse.ArgumentParser(
        description="Train a transformer for a copy task")
    add_optimizer_arguments(parser)
    add_transformer_arguments(parser)
    add_auxiliary_arguments(parser)
    args = parser.parse_args(argv)
    print("args:\n-----\n", args)
    # Make the dataset and the model
    max_depth = 12
    train_set = CountTaskWithEOS(max_depth * 2 + 1, max_depth=max_depth)
    test_set = CountTaskWithEOS(max_depth * 2 + 1, max_depth=max_depth)
    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              pin_memory=device == "cuda")
    test_loader = DataLoader(test_set,
                             batch_size=args.batch_size,
                             pin_memory=device == "cuda")

    if args.model_type == "transformer":
        model = SequencePredictorRecurrentTransformer(
            d_model=args.d_model,
            n_classes=args.n_classes,
            sequence_length=args.sequence_length,
            attention_type=args.attention_type,
            n_layers=args.n_layers,
            n_heads=args.n_heads,
            d_query=args.d_model,  # used to be d_query
            dropout=args.dropout,
            softmax_temp=None,
            attention_dropout=args.attention_dropout,
        )
    else:
        model = SequencePredictorRNN(d_model=args.d_model,
                                     n_classes=args.n_classes,
                                     n_layers=args.n_layers,
                                     dropout=args.dropout,
                                     rnn_type=args.model_type)
    print(f"Created model:\n{model}")
    model.to(device)
    # Start training
    optimizer = get_optimizer(model.parameters(), args)
    start_epoch = 1
    if args.continue_from:
        start_epoch = load_model(args.continue_from, model, optimizer, device)
    lr_schedule = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lambda e: 1. if e < args.reduce_lr_at else 0.1)
    for e in range(start_epoch, args.epochs + 1):
        train(model, optimizer, train_loader, device)
        print('Epoch:', e)
        evaluate(model, test_loader, device)
        if (e % args.save_frequency) == 0 and args.save_to:
            save_model(args.save_to, model, optimizer, e)
        lr_schedule.step()
    if args.plot_hidden:
        x, y, m = test_set.__next__()  # x is 1d of length sequence_len
        x.to(device)
        y.to(device)
        m.to(device)
        model.eval()
        yhat = model(x.unsqueeze(1).to(device))
        hdn = model.hidden_state  # batch x seq x hdn
        max_len = 10
        print("Plotting on: ", x[:max_len])
        plot_hidden_state_2d(hdn[0, :max_len, :].detach().cpu().numpy(),
                             pca=True)
Exemple #8
0
def main(argv):
    parser = argparse.ArgumentParser(
        description="Generate an image from a pretrained model")

    add_dataset_arguments(parser)
    add_transformer_arguments(parser)

    parser.add_argument(
        "model",
        help="The path to the model (give '-' for random intialization)")

    parser.add_argument("--mixtures",
                        type=int,
                        default=10,
                        help="How many logistics to use to model the output")

    parser.add_argument("--plot",
                        action="store_true",
                        help="Plot the generated image")
    parser.add_argument("--save_image", help="Path to save an image to")
    parser.add_argument("--image_shape",
                        type=lambda x: tuple(int(xi) for xi in x.split(",")),
                        default=(28, 28),
                        help="Reshape the prediction to plot it")
    parser.add_argument("--index",
                        type=index_type,
                        default=[0],
                        help="Choose the index from the dataset")
    parser.add_argument("--offset",
                        type=int,
                        default=300,
                        help="Choose the offset in the image")
    parser.add_argument("--training_set",
                        action="store_true",
                        help="Predict from the training set")

    parser.add_argument("--load_pytorch",
                        action="store_true",
                        help="Load old pytorch model")

    parser.add_argument("--force_cpu",
                        action="store_true",
                        help="Set the device to cpu")
    parser.add_argument("--recurrent",
                        action="store_true",
                        help="Use a recurrent model for inference")

    args = parser.parse_args(argv)
    print_transformer_arguments(args)

    # Choose a device to run on
    device = ("cuda"
              if torch.cuda.is_available() and not args.force_cpu else "cpu")
    # Get the dataset and load the model
    train_set, test_set = get_dataset(args)
    if args.recurrent:
        model = RecurrentGenerator(args.d_query * args.n_heads,
                                   train_set.sequence_length,
                                   args.mixtures,
                                   attention_type=args.attention_type,
                                   n_layers=args.n_layers,
                                   n_heads=args.n_heads,
                                   d_query=args.d_query,
                                   dropout=args.dropout,
                                   softmax_temp=None,
                                   attention_dropout=args.attention_dropout,
                                   bits=args.bits,
                                   rounds=args.rounds,
                                   chunk_size=args.chunk_size,
                                   masked=True)
    else:
        model = ImageGenerator(args.d_query * args.n_heads,
                               train_set.sequence_length,
                               args.mixtures,
                               attention_type=args.attention_type,
                               n_layers=args.n_layers,
                               n_heads=args.n_heads,
                               d_query=args.d_query,
                               dropout=args.dropout,
                               softmax_temp=None,
                               attention_dropout=args.attention_dropout,
                               bits=args.bits,
                               rounds=args.rounds,
                               chunk_size=args.chunk_size,
                               masked=True)

    # Gather the images
    images = collect_batch(train_set if args.training_set else test_set,
                           args.index, device)

    # Load the model
    if args.model != "-":
        if args.load_pytorch:
            load_model_pytorch(args.model, model, None, device)
        else:
            load_model(args.model, model, None, device)

    model.to(device)
    model.eval()

    # Do the predictions
    if args.recurrent:
        timer = Timer()
        pred_images = predict_with_recurrent(model, images, args.offset)
        print("Elapsed time:", timer.measure())
    else:
        timer = Timer()
        pred_images = predict(model, images, args.offset)
        print("Elapsed time:", timer.measure())

    # Plot or save the images
    if args.plot:
        print(pred_images)
        pred_images = pred_images.cpu()
        images = images.cpu()
        plt.figure()
        plt.imshow(pred_images[0].reshape(*args.image_shape))
        plt.figure()
        plt.imshow(np.hstack([images[0], 0]).reshape(*args.image_shape))
        plt.show()

    if args.save_image:
        pred_images = pred_images.cpu()
        images = images.cpu()
        for i in range(len(images)):
            imwrite(args.save_image.format("pred", i),
                    pred_images[i].reshape(*args.image_shape))
            imwrite(args.save_image.format("real", i),
                    np.hstack([images[i], 0]).reshape(*args.image_shape))