def run_classification_test(mat, true_labels, binarize=True, percentage_train=0.8, print_train_test_set_stat=True, test_thresholds=False, random_seed=None, d_args=None, d_triples=None): """ """ ## binarize full matrix if desired if binarize: mat = tm.binarize_sparse_matrix(mat) #### create train and test split logging.info('preparing train and test set.') train_indexes, test_indexes = get_stratified_train_test_indexes( true_labels, percentage_train, random_seed) # train_indexes, test_indexes = get_train_test_indexes_presplit(d_args) # train_indexes, test_indexes = \ # get_train_test_indices_from_triples(d_triples, true_labels, percentage_train, random_seed) train_mat, test_mat, true_train_labels, true_test_labels = \ split_matrix_to_train_and_test(mat, true_labels, train_indexes, test_indexes, print_stat=print_train_test_set_stat) model = classify(train_mat, test_mat, true_train_labels, true_test_labels, test_thresholds) return model
def run_classification_test(mat, true_labels, binarize=True, percentage_train=0.8, print_train_test_set_stat = True, test_thresholds=False, random_seed=None, d_args=None, d_triples=None): """ """ ## binarize full matrix if desired if binarize: mat = tm.binarize_sparse_matrix(mat) #### create train and test split logging.info('preparing train and test set.') train_indexes, test_indexes = get_stratified_train_test_indexes(true_labels, percentage_train, random_seed) # train_indexes, test_indexes = get_train_test_indexes_presplit(d_args) # train_indexes, test_indexes = \ # get_train_test_indices_from_triples(d_triples, true_labels, percentage_train, random_seed) train_mat, test_mat, true_train_labels, true_test_labels = \ split_matrix_to_train_and_test(mat, true_labels, train_indexes, test_indexes, print_stat=print_train_test_set_stat) model = classify(train_mat, test_mat, true_train_labels, true_test_labels, test_thresholds) return model
_train_idxes = np.delete(np.arange(0,len(_d_triples)), w1w2_idxs); _test_idxes = w1w2_idxs; print('Testing Context - ArgL - ArgR triples: \n{}'.format('\n'.join(['{} - {}'.format(i, _d_triples.get_triple(i)) for i in w1w2_idxs]))); else: if 's' in _w1: _train_idxes, _test_idxes, _zero_v_idxes = tc.get_stratified_train_test_indexes_notzero(_mat,true_labels, percentage_train=0.8, random_seed=623519); # _test_idxes = np.hstack((_test_idxes, _zero_v_idxes)); if 'd' in _w1: _train_idxes, _test_idxes, _zero_v_idxes = tc.get_fully_delex_train_test_indices_from_triples_notzero(_d_triples, _mat, true_labels, percentage_train_vocabulary=0.5, random_seed=623519); # _test_idxes = np.hstack((_test_idxes, _zero_v_idxes)); _in = raw_input('Binarize feature matrix? ([{}]es, [n]o, type q! to quit): '.format('\033[4m\033[1my\033[0m')); if _in == 'q!': raise KeyboardInterrupt(); if not _in.strip() or _in.strip().lower() == 'y': _mat = tm.binarize_sparse_matrix(_mat); _mat_train = _mat[_train_idxes,:]; _train_labels = true_labels[_train_idxes]; _mat_test = _mat[_test_idxes,:]; _test_labels = true_labels[_test_idxes]; predicted_test_labels, model = tc.clazzify(_mat_train, _mat_test, _train_labels); sorted_idxs = np.argsort(np.abs(model.coef_[0]))[::-1]; # sort and reverse indices, model.coef_ is just a (1 x n) matrix print('Coefficients:\n\t{}\n\t{}'.format(model.intercept_[0], '\n\t'.join(['{:+.3f} {:6d} {}'.format(model.coef_[0][i], i, _colheader[i]) for i in sorted_idxs[:20]]))); _in = raw_input('Enter y to predict {} zero-vector(s) with default class (0) (press <Enter> or n to not classify zero-vectors, type q! to quit): '.format(len(_zero_v_idxes))); if _in == 'q!': raise KeyboardInterrupt(); if _in.strip().lower() == 'y' and len(_zero_v_idxes) > 0: _test_idxes = np.hstack((_test_idxes, _zero_v_idxes));