Example #1
0
def predict_model(experiment, output_dir):
	"""Writes the predictions of a given dataset file."""
	saved_dir = "/home2/preetmhn/clms/ling_575_nlms/models/saved_{}".format(experiment)
	model = torch.load('{}/hate_speech_model_trained.pt'.format(saved_dir))
	settings = Settings(experiment, True)
	
	# get gpu
	device = get_gpu(settings)
	
	# get data, split with the same random seed as in training
	input_ids, labels, attention_masks = prepare_data(settings)
	_, validation_inputs, _, validation_labels = train_test_split(input_ids, labels, random_state=2018, test_size=0.1)
	_, validation_dataloader = split_data(settings, input_ids, labels, attention_masks)
	
	# make predictions and write to file
	settings.write_debug("Getting model predictions")
	preds = predict(device, model, validation_dataloader)

	# load tokenizer for the decoding
	tokenizer = load_bert_tokenizer(settings, True)
	
	# write to file
	settings.write_debug("Writing model predictions")
	output_file = os.path.join(output_dir, experiment + '_pred.txt')
	out = csv.writer(open(output_file, 'w+', encoding='utf-8'), delimiter='\t')
	out.writerow(['input', 'true', 'pred'])
	for i in range(len(preds)):
		tokens = tokenizer.decode(input_ids[i], skip_special_tokens=True)
		out.writerow([tokens, labels[i], preds[i]])
	
	# write scores
	settings.write_debug("Getting test evaluation")
	record_score_information(settings, validation_labels, preds)
Example #2
0
def pred_label(f_pred, prepare_data, options, iterator, kb_dict):
    labels = []
    for x1, x2, x1_lemma, x2_lemma, y in iterator:
        x1, x1_mask, x1_kb, x2, x2_mask, x2_kb, kb_att, y = prepare_data(x1, x2, x1_lemma, x2_lemma, y, options, kb_dict)
        preds = f_pred(x1, x1_mask, x1_kb, x2, x2_mask, x2_kb, kb_att)
        labels = labels + preds.tolist()

    return labels
Example #3
0
File: gen.py Project: smith6036/nli
def pred_label(f_pred, prepare_data, iterator):
    labels = []
    for x1, x2, y in iterator:
        x1, x1_mask, x2, x2_mask, y = prepare_data(x1, x2, y)
        preds = f_pred(x1, x1_mask, x2, x2_mask)
        labels = labels + preds.tolist()

    return labels
Example #4
0
def sanity_check_for_prepare_data():
    """Test function prepare_data and print out a sample."""
    print("Running check for function prepare_data()...")
    dataloader = prepare_data(batch_size=1)
    print("Length of dataloader: ", len(dataloader))
    sample = next(iter(dataloader))[0][0]
    print("Shape of the sample image: ", sample.shape)
    plt.figure()
    plt.imshow(np.transpose(sample.detach(), (1, 2, 0)))
    plt.show()
    print("Passed!")
Example #5
0
def sanity_check_for_train_hologan(use_gpu=True):
    """Test training discriminator."""
    now = datetime.now()
    subsample = None

    if use_gpu and torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    print(use_gpu)

    hologan = HoloGAN.Net(128, (3, 128, 128)).to(device)
    init_layers_serious(hologan)
    criterion = HoloGAN.compute_loss
    dataloader = prepare_data(batch_size=8, subsample=subsample)
    optim_G = optim.Adam(hologan.G.parameters())
    optim_D = optim.Adam(hologan.D.parameters())

    # Setup tensorboard
    logdir = "logs/fit/" + now.strftime("%Y%m%d-%H%M%S")
    writer = SummaryWriter(logdir)
    it = iter(dataloader)
    images, _ = it.next()
    img_grid = make_grid(images)
    writer.add_image("sample_batch", img_grid)
    writer.close()

    for epoch in range(3):
        train_one_epoch(dataloader,
                        hologan,
                        criterion,
                        optim_G,
                        optim_D,
                        device,
                        writer,
                        epoch=epoch,
                        print_step=20)
Example #6
0
def pred_label(f_prods, prepare_data, options, iterator, word_idict):
    labels = []
    valid_acc = 0
    n_done = 0
    for x1_, x2_, y_ in iterator:
        n_done += len(x1_)
        lengths_x1 = [len(s) for s in x1_]
        lengths_x2 = [len(s) for s in x2_]
        x1, x1_mask, char_x1, char_x1_mask, x2, x2_mask, char_x2, char_x2_mask, y = prepare_data(
            x1_, x2_, y_, word_idict)
        inps = [
            x1, x1_mask, char_x1, char_x1_mask, x2, x2_mask, char_x2,
            char_x2_mask
        ]
        prods = f_prods(*inps)
        preds = prods.argmax(axis=1)
        valid_acc += (preds == y).sum()
        labels = labels + preds.tolist()

    valid_acc = 1.0 * valid_acc / n_done
    print "total sampel", n_done
    print "Acc", valid_acc

    return labels
Example #7
0
    def forward(self, x):
        """Performs the forward pass."""
        emb = self.embeddings(x)
        recurrent_output, _ = self.recurrent_module(emb)
        output = self.seq_modules(recurrent_output[-1])

        return output


if __name__ == '__main__':
    np.random.seed(SEED)
    torch.manual_seed(SEED)

    train_dataloader, valid_dataloader, \
        test_dataloader, embeddings = prepare_data()  # freeze=False

    model = Recurrent(embeddings, nn.RNN)
    # model = Recurrent(embeddings, nn.RNN, num_layers=3)
    # model = Recurrent(embeddings, nn.RNN, num_layers=3, dropout=0.4)
    # model = Recurrent(embeddings, nn.LSTM)
    # model = Recurrent(embeddings, nn.LSTM, activation=nn.LeakyReLU)
    # model = Recurrent(embeddings, nn.GRU)
    # model = Recurrent(embeddings, nn.GRU, num_layers=1)
    # model = Recurrent(embeddings, nn.GRU, hidden_size=200)
    # model = Recurrent(embeddings, nn.GRU, bidirectional=True)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    # optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

    main(model,
Example #8
0
def sanity_check_for_train_discriminator(use_gpu=False):
    """Test training discriminator."""
    now = datetime.now()
    subsample = 80

    if use_gpu and torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    print(use_gpu)
    # z = torch.randn(128, device=device=None)

    netD = Discriminator(128, (3, 128, 128),
                         spec_norm=spectral_norm,
                         norm_layer=nn.InstanceNorm2d).to(device)
    criterion = nn.BCELoss()
    real_label = 1
    fake_label = 0
    opt = optim.Adam(netD.parameters())
    dataloader = prepare_data(batch_size=8, subsample=subsample)

    # Setup tensorboard
    logdir = "logs/fit/" + now.strftime("%Y%m%d-%H%M%S")
    writer = SummaryWriter(logdir)
    it = iter(dataloader)
    images, _ = it.next()
    img_grid = make_grid(images)
    writer.add_image("sample_batch", img_grid)
    writer.add_graph(netD, images.to(device))
    writer.close()

    running_loss = 0.0

    for epoc in range(10):
        print("\nEpoch: ", epoc + 1)

        for i_batch, data in enumerate(Bar(dataloader)):
            netD.zero_grad()
            real_img = data[0].to(device)
            bs = real_img.shape[0]
            d_gan, d_id, d_style = netD(real_img)
            label = torch.full(d_gan.shape,
                               real_label,
                               dtype=torch.float,
                               device=device)
            errD_real = criterion(d_gan, label)
            label = torch.full(d_style.shape,
                               real_label,
                               dtype=torch.float,
                               device=device)
            errD_real += criterion(d_style, label)
            errD_real.backward()
            opt.step()

            running_loss += errD_real.item()
            if i_batch % 10 == 9:
                writer.add_scalar("training loss", running_loss / 10,
                                  epoc * subsample + i_batch)
                writer.add_figure(
                    "predictions",
                    functional.plot_classes_preds(netD, real_img))
                running_loss = 0.0
Example #9
0
def test_prepare_data():
    """Dummy test function"""
    assert prepare_data("location") is False
    assert prepare_data("s3://mybucket/data/") is True
Example #10
0
def test_count_chains():
    adapters = prepare_data([16, 10, 15, 5, 1, 11, 7, 19, 6, 12, 4])
    assert count_possible_chains(adapters, 0) == 8
Example #11
0
from plotter import plot_two_class_classification, plot_regression_data, plot_multi_class_classification

if __name__ == '__main__':
    path_to_test_data = str(sys.argv[1])
    nol = int(sys.argv[2])  # number of lines of test data
    type_of_problem = sys.argv[3]
    radius_scale = float(sys.argv[4])
    sys.argv[4] = sys.argv[3]

    W = np.load("outputs/weights" + type_of_problem + ".npy")
    V = np.load("outputs/V" + type_of_problem + ".npy")
    gama = np.load("outputs/gama" + type_of_problem + ".npy")

    type_of_problem = int(type_of_problem)

    X, y, scales = prepare_data(number_of_lines=nol, path=path_to_test_data)
    l, n = X.shape
    c = 0
    if type_of_problem == 2:
        y = prepare_data_for_multi_class_classification(y)
        _, c = y.shape

    m, n_prime = V.shape
    assert n == n_prime

    G = generate_G_matrix(X, V, gama)
    yhad = generate_yhad_matrix(G, W)

    if type_of_problem == 0:
        plot_regression_data(yhad)
    elif type_of_problem == 1: