def __init__(self, filepath): super().__init__() # here we make a computational graph suited optimal RNN model # then we load the .pt file and we are all set # create model INPUT_DIM, OUTPUT_DIM = 8, 3 HID_DIM, N_LAYERS, DROPOUT = 24, 2, 0 DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = GRUModel(INPUT_DIM, HID_DIM, OUTPUT_DIM, N_LAYERS, DROPOUT, DEVICE) model = model.to(DEVICE) # load file model.load_state_dict(torch.load(filepath)) model.eval() # set sttribute self.model = model # self.criterion = nn.MSELoss().to(DEVICE) #nn.SmoothL1Loss().to(DEVICE) return
def test_gru_model(positives, negatives, data_fraction=1.0, fp_rate=0.01, lr=0.001, pca_embedding_dim=None, maxlen=50, gru_size=16, batch_size=1024, hidden_size=None, second_gru_size=None, decay=0.0001, epochs=30): positives = positives[:int(data_fraction * len(positives))] negatives = negatives[:int(data_fraction * len(negatives))] negatives_train = negatives[0: int(len(negatives) * .8)] negatives_dev = negatives[int(len(negatives) * .8): int(len(negatives) * .9)] negatives_test = negatives[int(len(negatives) * .9): ] print("Split sizes:") print(len(positives), len(negatives_train), len(negatives_dev), len(negatives_test)) model = GRUModel('../data/glove.6B.50d-char.txt', 50, lr=lr, pca_embedding_dim=pca_embedding_dim, maxlen=maxlen, gru_size=gru_size, batch_size=batch_size, hidden_size=hidden_size, second_gru_size=second_gru_size, decay=decay, epochs=epochs) shuffled = shuffle_for_training(negatives_train, positives) model.fit(shuffled[0], shuffled[1]) print(model.model.summary()) print("Params", model.model.count_params()) # model.save('model_test.h5') # model = load_model('model_test.h5') print("Using threshold 0.5") threshold = 0.5 evaluate_model(model, positives, negatives_train, negatives_dev, negatives_test, threshold) print("Getting threshold for fp_rate", fp_rate) preds = model.predicts(negatives_dev) preds.sort() fp_index = math.ceil((len(negatives_dev) * (1 - fp_rate))) threshold = preds[fp_index] print("Using threshold", threshold) evaluate_model(model, positives, negatives_train, negatives_dev, negatives_test, threshold)
positives = dataset['positives'] negatives = dataset['negatives'] negatives_train = negatives[0:int(len(negatives) * .9)] negatives_dev = negatives[int(len(negatives) * .8):int(len(negatives) * .9)] negatives_test = negatives[int(len(negatives) * .9):] shuffle = shuffle_for_training(negatives_train, positives) X, Y, char_indices, indices_char = vectorize_dataset( shuffle[0], shuffle[1], max_len) train_data = CharDataset(X, Y, max_len) train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0) data_iterator = iter(train_dataloader) model = GRUModel('./data/glove.6B.50d-char.txt', embedding_dim, char_indices, indices_char) model.to(device) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr) criterion = nn.BCELoss() for i in range(epochs): running_loss = 0 for data in tqdm(train_dataloader): x, y = data x = x.to(device) y = y.to(device) optimizer.zero_grad() out = model(x).squeeze(-1) loss = criterion(out, y) loss.backward()
Bits needed 12030733 Hash functions needed 9 False positives 0.0010336374778700803 ''' positives = positives[:] negatives = negatives[:] print(len(positives), len(negatives)) print("Baseline") url_test(positives, negatives, 0.01) print("First attempt to get results, no data balancing") model = GRUModel('../data/glove.6B.50d-char.txt', 50, pca_embedding_dim=16, maxlen=40, gru_size=16, batch_size=8192, lr=0.005, hidden_size=8, epochs=40) test_gru_model(positives, negatives, model, train_dev_fraction=0.95, deeper_bloom=False, fp_rate=0.01) print("deeper bloom version with k = 2") models = [ GRUModel('../data/glove.6B.50d-char.txt', 50,
def test_model(test=False): config = Config() model = Model(config, 'train.csv', debug=False) start_time = time.time() train_model(model) ## Save the weights and model print() print("#" * 20) print('Completed Training') print('Training Time:{} minutes'.format((time.time() - start_time) / 60)) if not test: return test_data = pd.read_csv('test.csv') X_test = test_data['comment_text'].values test_idx = test_data.iloc[:, 0].values print("Generating test results ...") model.config.batch_size = 59 with tf.Session() as sess: saver = tf.train.import_meta_graph('./weights/%s.meta' % model.config.model_name) saver.restore(sess, './weights/%s' % model.config.model_name) X_test, test_seq_length = get_batches( X=X_test, y=None, batch_size=model.config.batch_size, shuffle=False) e_pred = [] for X, seq in zip(X_test, test_seq_length): ## Run test in batches feed = model.build_feeddict(X, seq, val=True) p = sess.run(model.pred, feed_dict=feed) e_pred.append(p) prediction = np.concatenate(e_pred, axis=0) assert (len(test_idx) == len(prediction)) ## Code to write the output submissions to a file submit_df = pd.DataFrame({ 'id': test_idx, 'toxic': prediction[:, 0], 'severe_toxic': prediction[:, 1], 'obscene': prediction[:, 2], 'threat': prediction[:, 3], 'insult': prediction[:, 4], 'identity_hate': prediction[:, 5] }) submit_df.to_csv('submission.csv', index=False, columns=[ 'id', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate' ])
negatives = dataset['negatives'] negatives_train = negatives[0:int(len(negatives) * .9)] negatives_dev = negatives[int(len(negatives) * .8):int(len(negatives) * .9)] negatives_test = negatives[int(len(negatives) * .9):] if train_mode: shuffle = shuffle_for_training(negatives_train, positives) X, Y, char_indices, indices_char = vectorize_dataset( shuffle[0], shuffle[1], max_len) train_data = CharDataset(X, Y, max_len) train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0) model = GRUModel('./data/glove.6B.50d-char.txt', embedding_dim, char_indices, indices_char) model.to(device) train(model, train_dataloader) else: model_path = "./model.h5" model = torch.load(model_path, map_location=device) #model.predicts(negatives_dev, device) evaluate_model(model, positives, negatives_train, negatives_dev, negatives_test, device, threshold=0.5)