def predict_model(experiment, output_dir): """Writes the predictions of a given dataset file.""" saved_dir = "/home2/preetmhn/clms/ling_575_nlms/models/saved_{}".format(experiment) model = torch.load('{}/hate_speech_model_trained.pt'.format(saved_dir)) settings = Settings(experiment, True) # get gpu device = get_gpu(settings) # get data, split with the same random seed as in training input_ids, labels, attention_masks = prepare_data(settings) _, validation_inputs, _, validation_labels = train_test_split(input_ids, labels, random_state=2018, test_size=0.1) _, validation_dataloader = split_data(settings, input_ids, labels, attention_masks) # make predictions and write to file settings.write_debug("Getting model predictions") preds = predict(device, model, validation_dataloader) # load tokenizer for the decoding tokenizer = load_bert_tokenizer(settings, True) # write to file settings.write_debug("Writing model predictions") output_file = os.path.join(output_dir, experiment + '_pred.txt') out = csv.writer(open(output_file, 'w+', encoding='utf-8'), delimiter='\t') out.writerow(['input', 'true', 'pred']) for i in range(len(preds)): tokens = tokenizer.decode(input_ids[i], skip_special_tokens=True) out.writerow([tokens, labels[i], preds[i]]) # write scores settings.write_debug("Getting test evaluation") record_score_information(settings, validation_labels, preds)
def pred_label(f_pred, prepare_data, options, iterator, kb_dict): labels = [] for x1, x2, x1_lemma, x2_lemma, y in iterator: x1, x1_mask, x1_kb, x2, x2_mask, x2_kb, kb_att, y = prepare_data(x1, x2, x1_lemma, x2_lemma, y, options, kb_dict) preds = f_pred(x1, x1_mask, x1_kb, x2, x2_mask, x2_kb, kb_att) labels = labels + preds.tolist() return labels
def pred_label(f_pred, prepare_data, iterator): labels = [] for x1, x2, y in iterator: x1, x1_mask, x2, x2_mask, y = prepare_data(x1, x2, y) preds = f_pred(x1, x1_mask, x2, x2_mask) labels = labels + preds.tolist() return labels
def sanity_check_for_prepare_data(): """Test function prepare_data and print out a sample.""" print("Running check for function prepare_data()...") dataloader = prepare_data(batch_size=1) print("Length of dataloader: ", len(dataloader)) sample = next(iter(dataloader))[0][0] print("Shape of the sample image: ", sample.shape) plt.figure() plt.imshow(np.transpose(sample.detach(), (1, 2, 0))) plt.show() print("Passed!")
def sanity_check_for_train_hologan(use_gpu=True): """Test training discriminator.""" now = datetime.now() subsample = None if use_gpu and torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') print(use_gpu) hologan = HoloGAN.Net(128, (3, 128, 128)).to(device) init_layers_serious(hologan) criterion = HoloGAN.compute_loss dataloader = prepare_data(batch_size=8, subsample=subsample) optim_G = optim.Adam(hologan.G.parameters()) optim_D = optim.Adam(hologan.D.parameters()) # Setup tensorboard logdir = "logs/fit/" + now.strftime("%Y%m%d-%H%M%S") writer = SummaryWriter(logdir) it = iter(dataloader) images, _ = it.next() img_grid = make_grid(images) writer.add_image("sample_batch", img_grid) writer.close() for epoch in range(3): train_one_epoch(dataloader, hologan, criterion, optim_G, optim_D, device, writer, epoch=epoch, print_step=20)
def pred_label(f_prods, prepare_data, options, iterator, word_idict): labels = [] valid_acc = 0 n_done = 0 for x1_, x2_, y_ in iterator: n_done += len(x1_) lengths_x1 = [len(s) for s in x1_] lengths_x2 = [len(s) for s in x2_] x1, x1_mask, char_x1, char_x1_mask, x2, x2_mask, char_x2, char_x2_mask, y = prepare_data( x1_, x2_, y_, word_idict) inps = [ x1, x1_mask, char_x1, char_x1_mask, x2, x2_mask, char_x2, char_x2_mask ] prods = f_prods(*inps) preds = prods.argmax(axis=1) valid_acc += (preds == y).sum() labels = labels + preds.tolist() valid_acc = 1.0 * valid_acc / n_done print "total sampel", n_done print "Acc", valid_acc return labels
def forward(self, x): """Performs the forward pass.""" emb = self.embeddings(x) recurrent_output, _ = self.recurrent_module(emb) output = self.seq_modules(recurrent_output[-1]) return output if __name__ == '__main__': np.random.seed(SEED) torch.manual_seed(SEED) train_dataloader, valid_dataloader, \ test_dataloader, embeddings = prepare_data() # freeze=False model = Recurrent(embeddings, nn.RNN) # model = Recurrent(embeddings, nn.RNN, num_layers=3) # model = Recurrent(embeddings, nn.RNN, num_layers=3, dropout=0.4) # model = Recurrent(embeddings, nn.LSTM) # model = Recurrent(embeddings, nn.LSTM, activation=nn.LeakyReLU) # model = Recurrent(embeddings, nn.GRU) # model = Recurrent(embeddings, nn.GRU, num_layers=1) # model = Recurrent(embeddings, nn.GRU, hidden_size=200) # model = Recurrent(embeddings, nn.GRU, bidirectional=True) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) # optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) main(model,
def sanity_check_for_train_discriminator(use_gpu=False): """Test training discriminator.""" now = datetime.now() subsample = 80 if use_gpu and torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') print(use_gpu) # z = torch.randn(128, device=device=None) netD = Discriminator(128, (3, 128, 128), spec_norm=spectral_norm, norm_layer=nn.InstanceNorm2d).to(device) criterion = nn.BCELoss() real_label = 1 fake_label = 0 opt = optim.Adam(netD.parameters()) dataloader = prepare_data(batch_size=8, subsample=subsample) # Setup tensorboard logdir = "logs/fit/" + now.strftime("%Y%m%d-%H%M%S") writer = SummaryWriter(logdir) it = iter(dataloader) images, _ = it.next() img_grid = make_grid(images) writer.add_image("sample_batch", img_grid) writer.add_graph(netD, images.to(device)) writer.close() running_loss = 0.0 for epoc in range(10): print("\nEpoch: ", epoc + 1) for i_batch, data in enumerate(Bar(dataloader)): netD.zero_grad() real_img = data[0].to(device) bs = real_img.shape[0] d_gan, d_id, d_style = netD(real_img) label = torch.full(d_gan.shape, real_label, dtype=torch.float, device=device) errD_real = criterion(d_gan, label) label = torch.full(d_style.shape, real_label, dtype=torch.float, device=device) errD_real += criterion(d_style, label) errD_real.backward() opt.step() running_loss += errD_real.item() if i_batch % 10 == 9: writer.add_scalar("training loss", running_loss / 10, epoc * subsample + i_batch) writer.add_figure( "predictions", functional.plot_classes_preds(netD, real_img)) running_loss = 0.0
def test_prepare_data(): """Dummy test function""" assert prepare_data("location") is False assert prepare_data("s3://mybucket/data/") is True
def test_count_chains(): adapters = prepare_data([16, 10, 15, 5, 1, 11, 7, 19, 6, 12, 4]) assert count_possible_chains(adapters, 0) == 8
from plotter import plot_two_class_classification, plot_regression_data, plot_multi_class_classification if __name__ == '__main__': path_to_test_data = str(sys.argv[1]) nol = int(sys.argv[2]) # number of lines of test data type_of_problem = sys.argv[3] radius_scale = float(sys.argv[4]) sys.argv[4] = sys.argv[3] W = np.load("outputs/weights" + type_of_problem + ".npy") V = np.load("outputs/V" + type_of_problem + ".npy") gama = np.load("outputs/gama" + type_of_problem + ".npy") type_of_problem = int(type_of_problem) X, y, scales = prepare_data(number_of_lines=nol, path=path_to_test_data) l, n = X.shape c = 0 if type_of_problem == 2: y = prepare_data_for_multi_class_classification(y) _, c = y.shape m, n_prime = V.shape assert n == n_prime G = generate_G_matrix(X, V, gama) yhad = generate_yhad_matrix(G, W) if type_of_problem == 0: plot_regression_data(yhad) elif type_of_problem == 1: