예제 #1
0
 def test_inverse_transform_one_cat(self):
     x_train, y_train = [['a']], [['O']]
     it = IndexTransformer()
     it.fit(self.x, self.y)
     _, y = it.transform(x_train, y_train)
     inv_y = it.inverse_transform(y)
     self.assertNotEqual(inv_y, self.y)
예제 #2
0
 def test_inverse_transform_unknown_token(self):
     x_train, y_train = [['a', 'b']], [['X', 'O']]
     it = IndexTransformer()
     it.fit(self.x, self.y)
     _, y = it.transform(x_train, y_train)
     inv_y = it.inverse_transform(y)
     self.assertNotEqual(inv_y, self.y)
예제 #3
0
def predict_with_folds(swa):
    test = pd.read_csv(config.data_folder + "test.csv",
                       converters={"pos": literal_eval})
    x_test = [x.split() for x in test['sentence'].tolist()]

    p = IndexTransformer(use_char=True)
    p = p.load('../models/best_transform.it')
    lengths = map(len, x_test)
    x_test = p.transform(x_test)

    fold_result = []
    for n_fold in range(config.nfolds):

        path = '../models/best_model_' + str(n_fold)

        if swa:
            path += '_swa'

        model = load_model(path + '.h5',
                           custom_objects={
                               'CRF': CRF,
                               'RAdam': RAdam,
                               'crf_loss': crf_loss,
                               'crf_viterbi_accuracy': crf_viterbi_accuracy
                           })
        y_pred = model.predict(x_test, verbose=True)

        fold_result.append(y_pred)

    final_pred = np.mean(fold_result, axis=0)
    y_pred = p.inverse_transform(final_pred, lengths)
    build_submission(y_pred, 'fold')
예제 #4
0
def evaluate(swa):
    train = pd.read_csv(config.data_folder + "train.csv", converters={"pos": literal_eval, "tag": literal_eval})
    x_train = [x.split() for x in train['sentence'].tolist()]
    y_train = train['tag'].tolist()

    p = IndexTransformer(use_char=True)
    p = p.load('../models/best_transform.it')

    oof_data = []
    oof_data_pred = []

    skf = KFold(n_splits=config.nfolds, random_state=config.seed, shuffle=True)

    for n_fold, (train_indices, val_indices) in enumerate(skf.split(x_train)):

        x_val = list(np.array(x_train)[val_indices])
        y_val = list(np.array(y_train)[val_indices])
        print(y_val[:5])
        oof_data.extend([x for line in y_val for x in line])
        print(oof_data[:5])
        lengths = map(len, x_val)
        x_val = p.transform(x_val)

        path = '../models/best_model_' + str(n_fold)

        if swa:
            path += '_swa'

        model = load_model(path + '.h5',
                           custom_objects={'CRF': CRF,
                                           'RAdam': RAdam,
                                           'crf_loss' : crf_loss,
                                           'crf_viterbi_accuracy': crf_viterbi_accuracy})

        # model.load_weights('../models/best_model_' + str(n_fold) + '.h5')

        y_pred = model.predict(x_val,
                               verbose=True)
        print(y_pred[:5])
        y_pred = p.inverse_transform(y_pred, lengths)
        print(y_pred[:5])
        oof_data_pred.extend([pred for line in y_pred for pred in line])
        print(oof_data_pred[:5])

    bacc = balanced_accuracy_score(oof_data,oof_data_pred)
    print("Final CV: ", bacc*100)
예제 #5
0
 def test_inverse_transform(self):
     it = IndexTransformer()
     x, y = it.fit_transform(self.x, self.y)
     _, _, length = x
     inv_y = it.inverse_transform(y, length)
     self.assertEqual(inv_y, self.y)
 def test_inverse_transform(self):
     it = IndexTransformer()
     x, y = it.fit_transform(self.x, self.y)
     lengths = map(len, self.y)
     inv_y = it.inverse_transform(y, lengths)
     self.assertEqual(inv_y, self.y)