def main(argv=None):
    print("Running on {}".format(device))
    parser = argparse.ArgumentParser(
        description="Train a transformer for a copy task"
    )
    add_optimizer_arguments(parser)
    add_transformer_arguments(parser)
    add_auxiliary_arguments(parser)
    args = parser.parse_args(argv)
    print("args:\n-----\n", args)

    data_points = []
    data_points_acc = []
    n_of_each_model = 10
    n_trials = 8
    for model_type in [ 'transformer','lstm','rnn',]: #add back transformers and rnn
        for max_trained_depth in range(1, 12):
            for ii in range(n_of_each_model):
                print(f'dep{max_trained_depth}_ii_{ii}')
                if model_type == "transformer":
                    d_model = 16
                    model = SequencePredictorRecurrentTransformer(
                                d_model=d_model, n_classes=5,
                                sequence_length=args.sequence_length,
                                attention_type=args.attention_type,
                                n_layers=args.n_layers,
                                n_heads=args.n_heads,
                                d_query=d_model, # used to be d_query
                                dropout=args.dropout,
                                softmax_temp=None,
                                attention_dropout=args.attention_dropout,
                            )
                else:
                    d_model = 8
                    model = SequencePredictorRNN(
                                d_model=d_model, n_classes=5,
                                n_layers=args.n_layers,
                                dropout=args.dropout,
                                rnn_type=model_type
                            )
                print(f"Created model:\n{model}")
                model.to(device)
                model.load_state_dict(torch.load(f"models_from_colab/agreement_models/model_{model_type}_depth_{max_trained_depth}_num_{ii}.zip", map_location=device)['model_state'])
                for test_depth in range(1, 21): # was 1, 32
                    stack_size = test_depth # Change this value to test longer / shorter sequences
                    n_correct = 0
                    for i_trial in range(n_trials):
                        x, y, m = SubjectVerbAgreement.get_seq(stack_size)
                        model.eval()
                        yhat = model(x.unsqueeze(1))
                        loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1))
                        n_correct += acc
                    data_points.append({'model_type': model_type, 'max_trained_depth': max_trained_depth,
                                        'test_depth': test_depth, 'accuracy': n_correct / n_trials})

    print("data points")
    print(data_points)
    with open("data_points_pr_acc_r.txt", "wb") as fp:
        pickle.dump(data_points, fp)
    """
def main(argv=None):
    print("Running on {}".format(device))
    parser = argparse.ArgumentParser(
        description="Train a transformer for a copy task"
    )
    add_optimizer_arguments(parser)
    add_transformer_arguments(parser)
    add_auxiliary_arguments(parser)
    args = parser.parse_args(argv)
    print("args:\n-----\n", args)
    if args.model_type == "transformer":
        model = SequencePredictorRecurrentTransformer(
                    d_model=args.d_model, n_classes=5,
                    sequence_length=args.sequence_length,
                    attention_type=args.attention_type,
                    n_layers=args.n_layers,
                    n_heads=args.n_heads,
                    d_query=args.d_model, # used to be d_query
                    dropout=args.dropout,
                    softmax_temp=None,
                    attention_dropout=args.attention_dropout,
                )
    else:
        model = SequencePredictorRNN(
                    d_model=args.d_model, n_classes=5,
                    n_layers=args.n_layers,
                    dropout=args.dropout,
                    rnn_type=args.model_type
                )
    print(f"Created model:\n{model}")
    model.to(device)
    print("Number of epochs model was trained on: ",torch.load(args.continue_from, map_location=device)['epoch'])
    model.load_state_dict(torch.load(args.continue_from, map_location=device)['model_state'])

    def format_preds(x, y, preds, mask):
        n = len(x)
        n_dig = math.floor(math.log10(n)) + 1
        nums = []
        for p_dig in range(n_dig):
            nums.append( "# |" + "".join([str((i//10**p_dig)%10) for i in range(n)]) + "\n")
        nums = "".join(nums[::-1])
        xs = "x |" + "".join([str(int(v)) for v in x]) + "\n"
        ys = "y |" + "".join([elt if mask[i] == 1 else '?' for i, elt in enumerate([str(int(v)) for v in y])]) + "\n"
        yh = "yh|" + "".join([elt if mask[i] == 1 else '?' for i, elt in enumerate([str(int(v)) for v in preds])]) + "\n"
        return nums + xs + ys + yh

    acc_list = []
    max_acc = None
    for stack_size in range(1, 64):
        x, y, m = SubjectVerbAgreement.get_seq(stack_size)
        # print(x.shape, y.shape, m.shape)
        model.eval()
        yhat = model(x.unsqueeze(1))
        hdn = model.hidden_state # batch x seq x hdn
        loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1))
        acc_list.append((stack_size, acc))
        if acc == 1:
            max_acc = stack_size
    print("Highest perfect score at depth:", max_acc)
    plot_hidden_state_2d(np.array(acc_list), pca=False)

    stack_size = 7 # Change this value to test longer / shorter sequences
    x, y, m = SubjectVerbAgreement.get_seq(stack_size)
    model.eval()
    yhat = model(x.unsqueeze(1))
    hdn = model.hidden_state # batch x seq x hdn
    loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1))
    print("Model loss: ", loss)
    print("Model accuracy: ", acc)
    print(format_preds(x, y, torch.argmax(yhat, dim=2)[0], m))
    plot_hidden_state_2d(hdn[0].detach().cpu().numpy(), pca=True)

    """
Esempio n. 3
0
def main(argv=None):
    # Choose a device and move everything there
    print("Running on {}".format(device))

    parser = argparse.ArgumentParser(
        description="Train a transformer for an agreement task"
    )
    add_optimizer_arguments(parser)
    add_transformer_arguments(parser)
    add_auxiliary_arguments(parser)
    args = parser.parse_args(argv)
    print("args:\n-----\n", args)
    # Make the dataset and the model
    for model_type in ['rnn', 'lstm', 'transformer']:
        for max_depth in range(1, 12):
          ii = 0
          while ii < 10:
            print(max_depth, ii)
            # skip existing models
            if os.path.isfile("/content/drive/My Drive/final_project_material/agreement_models/model_" + model_type + "_depth_" + str(max_depth) + "_num_" + str(ii)):
              ii += 1
              continue
            train_set = SubjectVerbAgreement(2*max_depth+1, max_depth=max_depth)
            test_set = SubjectVerbAgreement(2*max_depth+1, max_depth=max_depth)
            train_loader = DataLoader(
                train_set,
                batch_size=batch_size,
                pin_memory=device=="cuda"
            )
            test_loader = DataLoader(
                test_set,
                batch_size=batch_size,
                pin_memory=device=="cuda"
            )

            if model_type == "transformer":
                d_model = 16
                model = SequencePredictorRecurrentTransformer(
                            d_model=d_model, n_classes=5,
                            sequence_length=args.sequence_length,
                            attention_type=args.attention_type,
                            n_layers=args.n_layers,
                            n_heads=args.n_heads,
                            d_query=d_model, # used to be d_query
                            dropout=args.dropout,
                            softmax_temp=None,
                            attention_dropout=args.attention_dropout,
                        )
            else:
                d_model=8
                model = SequencePredictorRNN(
                            d_model=d_model, n_classes=5,
                            n_layers=args.n_layers,
                            dropout=args.dropout,
                            rnn_type=model_type
                        )
            print(f"Created model:\n{model}")
            model.to(device)
            # Start training
            optimizer = get_optimizer(model.parameters(), args)
            start_epoch = 1
            if args.continue_from:
                start_epoch = load_model(
                    args.continue_from,
                    model,
                    optimizer,
                    device
                )
            lr_schedule = torch.optim.lr_scheduler.LambdaLR(
                optimizer,
                lambda e: 1. if e < args.reduce_lr_at else 0.1
            )
            for e in range(start_epoch, args.epochs+1):
                print('Epoch:', e)
                print('Training...')
                train(model, optimizer, train_loader, device)
                print('Evaluating...')
                acc = evaluate(model, test_loader, device, return_accuracy=True)
                lr_schedule.step()
                if e == 100:
                    break
                if acc >= 0.95:
                    save_model("/content/drive/My Drive/final_project_material/agreement_models/model_" + model_type + "_depth_" + str(max_depth) + "_num_" + str(ii),
                    model, optimizer, e)
                    ii += 1
                    break
Esempio n. 4
0
def main(argv=None):
    print("Running on {}".format(device))
    parser = argparse.ArgumentParser(
        description="Train a transformer for a copy task")
    add_optimizer_arguments(parser)
    add_transformer_arguments(parser)
    add_auxiliary_arguments(parser)
    args = parser.parse_args(argv)
    print("args:\n-----\n", args)

    data_points = []
    for model_type in ['rnn', 'lstm', 'transformer']:
        for max_trained_depth in range(1, 12):
            for test_depth in range(1, 21):
                for ii in range(10):
                    if model_type == "transformer":
                        model = SequencePredictorRecurrentTransformer(
                            d_model=16,
                            n_classes=5,
                            sequence_length=args.sequence_length,
                            attention_type=args.attention_type,
                            n_layers=args.n_layers,
                            n_heads=args.n_heads,
                            d_query=8,  # used to be d_query
                            dropout=args.dropout,
                            softmax_temp=None,
                            attention_dropout=args.attention_dropout,
                        )
                    else:
                        model = SequencePredictorRNN(
                            d_model=8 if model_type == 'lstm' else 8,
                            n_classes=5,
                            n_layers=args.n_layers,
                            dropout=args.dropout,
                            rnn_type=model_type)
                    print(f"Created model:\n{model}")
                    model.to(device)
                    model_name = "models_from_colab/agreement_models/model_" + model_type + "_depth_" + str(
                        max_trained_depth) + "_num_" + str(ii) + ".zip"
                    model.load_state_dict(
                        torch.load(model_name,
                                   map_location=device)['model_state'])

                    stack_size = test_depth
                    x, y, m = SubjectVerbAgreement.get_seq(stack_size)
                    model.eval()
                    yhat = model(x.unsqueeze(1))
                    hdn = model.hidden_state  # batch x seq x hdn
                    loss, acc = loss_fn(y.unsqueeze(1), yhat, m.unsqueeze(1))
                    data_points.append({
                        'model_type': model_type,
                        'max_trained_depth': max_trained_depth,
                        'test_depth': test_depth,
                        'accuracy': acc
                    })
    print("data points:")
    print(data_points)

    with open("data_points_sva.txt", "wb") as fp:
        pickle.dump(data_points, fp)
    """
Esempio n. 5
0
def main(argv=None):
    # Choose a device and move everything there
    print("Running on {}".format(device))

    parser = argparse.ArgumentParser(
        description="Train a transformer for a copy task")
    add_optimizer_arguments(parser)
    add_transformer_arguments(parser)
    add_auxiliary_arguments(parser)
    args = parser.parse_args(argv)
    print("args:\n-----\n", args)
    # Make the dataset and the model
    max_depth = 12
    train_set = CountTaskWithEOS(max_depth * 2 + 1, max_depth=max_depth)
    test_set = CountTaskWithEOS(max_depth * 2 + 1, max_depth=max_depth)
    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              pin_memory=device == "cuda")
    test_loader = DataLoader(test_set,
                             batch_size=args.batch_size,
                             pin_memory=device == "cuda")

    if args.model_type == "transformer":
        model = SequencePredictorRecurrentTransformer(
            d_model=args.d_model,
            n_classes=args.n_classes,
            sequence_length=args.sequence_length,
            attention_type=args.attention_type,
            n_layers=args.n_layers,
            n_heads=args.n_heads,
            d_query=args.d_model,  # used to be d_query
            dropout=args.dropout,
            softmax_temp=None,
            attention_dropout=args.attention_dropout,
        )
    else:
        model = SequencePredictorRNN(d_model=args.d_model,
                                     n_classes=args.n_classes,
                                     n_layers=args.n_layers,
                                     dropout=args.dropout,
                                     rnn_type=args.model_type)
    print(f"Created model:\n{model}")
    model.to(device)
    # Start training
    optimizer = get_optimizer(model.parameters(), args)
    start_epoch = 1
    if args.continue_from:
        start_epoch = load_model(args.continue_from, model, optimizer, device)
    lr_schedule = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lambda e: 1. if e < args.reduce_lr_at else 0.1)
    for e in range(start_epoch, args.epochs + 1):
        train(model, optimizer, train_loader, device)
        print('Epoch:', e)
        evaluate(model, test_loader, device)
        if (e % args.save_frequency) == 0 and args.save_to:
            save_model(args.save_to, model, optimizer, e)
        lr_schedule.step()
    if args.plot_hidden:
        x, y, m = test_set.__next__()  # x is 1d of length sequence_len
        x.to(device)
        y.to(device)
        m.to(device)
        model.eval()
        yhat = model(x.unsqueeze(1).to(device))
        hdn = model.hidden_state  # batch x seq x hdn
        max_len = 10
        print("Plotting on: ", x[:max_len])
        plot_hidden_state_2d(hdn[0, :max_len, :].detach().cpu().numpy(),
                             pca=True)