def main(): parser = ArgumentParser() parser.add_argument('-m', '--model', dest='model_name', type=str, required=True, help='The binary file with the text classifier.') parser.add_argument('--conv1', dest='size_of_conv1', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 1.') parser.add_argument('--conv2', dest='size_of_conv2', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 2.') parser.add_argument('--conv3', dest='size_of_conv3', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 3.') parser.add_argument('--conv4', dest='size_of_conv4', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 4.') parser.add_argument('--conv5', dest='size_of_conv5', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 5.') parser.add_argument('--hidden', dest='hidden_layer_size', type=str, required=False, default='500', help='Size of each hidden layer and total number of hidden layers (separate them with colons).') parser.add_argument('--num_monte_carlo', dest='num_monte_carlo', type=int, required=False, default=10, help='Number of generated Monte Carlo samples for each data sample.') parser.add_argument('--batch_size', dest='batch_size', type=int, required=False, default=16, help='Size of mini-batch.') parser.add_argument('--gpu_frac', dest='gpu_memory_frac', type=float, required=False, default=0.9, help='Allocable part of the GPU memory for the classifier.') parser.add_argument('--nn_type', dest='nn_type', type=str, choices=['bayesian', 'usual'], required=False, default='bayesian', help='Neural network type: `bayesian` or `usual`.') args = parser.parse_args() model_name = os.path.normpath(args.model_name) if os.path.isfile(model_name): with open(model_name, 'rb') as fp: nn = pickle.load(fp) else: hidden_layer_size, n_hidden_layers = parse_hidden_layers_description(args.hidden_layer_size) train_texts, train_labels = load_data('train') print('Number of samples for training is {0}.'.format(len(train_texts))) nn = ImpartialTextClassifier(filters_for_conv1=args.size_of_conv1, filters_for_conv2=args.size_of_conv2, filters_for_conv3=args.size_of_conv3, filters_for_conv4=args.size_of_conv4, filters_for_conv5=args.size_of_conv5, hidden_layer_size=hidden_layer_size, n_hidden_layers=n_hidden_layers, batch_size=args.batch_size, num_monte_carlo=args.num_monte_carlo, gpu_memory_frac=args.gpu_memory_frac, verbose=True, multioutput=False, random_seed=42, validation_fraction=0.15, max_epochs=100, patience=5, bayesian=(args.nn_type == 'bayesian'), kl_weight_init=0.05, kl_weight_fin=0.05) nn.fit(train_texts, train_labels) print('') with open(model_name, 'wb') as fp: pickle.dump(nn, fp) test_texts, test_labels = load_data('test') print('') print('Number of samples for final testing is {0}.'.format(len(test_texts))) print('Test F1-macro is {0:.2%}.'.format(nn.score(test_texts, test_labels)))
def test_parse_hidden_layers_description_negative04(self): src = '100:-1' true_err_msg = re.escape( '`100:-1` is wrong description of hidden layers!') with self.assertRaisesRegex(ValueError, true_err_msg): _ = parse_hidden_layers_description(src)
def test_parse_hidden_layers_description_negative01(self): src = ':' true_err_msg = re.escape('Description of hidden layers is empty!') with self.assertRaisesRegex(ValueError, true_err_msg): _ = parse_hidden_layers_description(src)
def test_parse_hidden_layers_description_positive07(self): src = None true_res = (0, 0) self.assertEqual(true_res, parse_hidden_layers_description(src))
def test_parse_hidden_layers_description_positive02(self): src = '100:3' true_res = (100, 3) self.assertEqual(true_res, parse_hidden_layers_description(src))
def main(): def func(args): conv1_ = int(args[0]) conv2_ = int(args[1]) conv3_ = int(args[2]) conv4_ = int(args[3]) conv5_ = int(args[4]) hidden_layer_size_ = int(args[5]) n_hidden_layers_ = int(args[6]) if (n_hidden_layers_ == 0) or (hidden_layer_size_ == 0): hidden_layer_size_ = 0 n_hidden_layers_ = 0 quality = 0.0 print('Filters number for different convolution kernels: ({0}, {1}, {2}, {3}, {4})'.format( conv1_, conv2_, conv3_, conv4_, conv5_)) if n_hidden_layers_ > 0: print('Hidden layer size is {0}.'.format(hidden_layer_size_)) print('Number of hidden layers is {0}.'.format(n_hidden_layers_)) if nn_type == 'bayesian': init_kl_weight = float(args[7]) fin_kl_weight = float(args[8]) print('Optimal value of initial KL weight is {0:.6f}.'.format(init_kl_weight)) print('Optimal value of final KL weight is {0:.6f}.'.format(fin_kl_weight)) else: init_kl_weight = 1.0 fin_kl_weight = 1.0 if sum(args) == 0: return 1.0 for fold_idx, (train_index, test_index) in enumerate(indices_for_cv): cls = ImpartialTextClassifier(bert_hub_module_handle=(None if os.path.exists(os.path.normpath(bert_handle)) else bert_handle), filters_for_conv1=conv1_, filters_for_conv2=conv2_, filters_for_conv3=conv3_, filters_for_conv4=conv4_, filters_for_conv5=conv5_, hidden_layer_size=hidden_layer_size_, n_hidden_layers=n_hidden_layers_, multioutput=multioutput, gpu_memory_frac=gpu_memory_frac, num_monte_carlo=num_monte_carlo, verbose=False, random_seed=42, max_epochs=100, patience=5, batch_size=16, bayesian=(nn_type == 'bayesian'), kl_weight_init=init_kl_weight, kl_weight_fin=fin_kl_weight) if os.path.exists(os.path.normpath(bert_handle)): cls.PATH_TO_BERT = os.path.normpath(bert_handle) train_texts = labeled_texts[train_index] train_labels = labels[train_index] train_index_, val_index = cls.train_test_split(train_labels, 0.1) val_texts = train_texts[val_index] val_labels = train_labels[val_index] if unlabeled_texts_for_training is None: train_texts = train_texts[train_index_] train_labels = train_labels[train_index_] else: train_texts = np.concatenate( ( train_texts[train_index_], unlabeled_texts_for_training ) ) train_labels = np.concatenate( ( train_labels[train_index_], np.full(shape=(len(unlabeled_texts_for_training),), fill_value=-1, dtype=np.int32) ) ) cls.fit(train_texts, train_labels, validation_data=(val_texts, val_labels)) del train_texts, train_labels, val_texts, val_labels, train_index_, val_index if unlabeled_texts_for_testing is None: texts_for_final_testing = labeled_texts[test_index] labels_for_final_testing = labels[test_index] else: texts_for_final_testing = np.concatenate( ( labeled_texts[test_index], unlabeled_texts_for_testing ) ) labels_for_final_testing = np.concatenate( ( labels[test_index], np.full(shape=(len(unlabeled_texts_for_testing),), fill_value=-1, dtype=np.int32) ) ) instant_quality = cls.score(texts_for_final_testing, labels_for_final_testing) quality += instant_quality print('Fold {0}: {1:.6f}.'.format(fold_idx + 1, instant_quality)) del cls, texts_for_final_testing, labels_for_final_testing quality /= float(len(indices_for_cv)) print('Total quality = {0:.6f}.'.format(quality)) print('') return -quality def score(args): conv1_ = int(args[0]) conv2_ = int(args[1]) conv3_ = int(args[2]) conv4_ = int(args[3]) conv5_ = int(args[4]) hidden_layer_size_ = int(args[5]) n_hidden_layers_ = int(args[6]) if (n_hidden_layers_ == 0) or (hidden_layer_size_ == 0): hidden_layer_size_ = 0 n_hidden_layers_ = 0 print('Optimal filters number for different convolution kernels: ({0}, {1}, {2}, {3}, {4})'.format( conv1_, conv2_, conv3_, conv4_, conv5_)) if n_hidden_layers_ > 0: print('Optimal size of the hidden layer is {0}.'.format(hidden_layer_size_)) print('Optimal number of hidden layers is {0}.'.format(n_hidden_layers_)) if nn_type == 'bayesian': init_kl_weight = float(args[7]) fin_kl_weight = float(args[8]) print('Optimal value of initial KL weight is {0:.6f}.'.format(init_kl_weight)) print('Optimal value of final KL weight is {0:.6f}.'.format(fin_kl_weight)) else: init_kl_weight = 1.0 fin_kl_weight = 1.0 print('') y_pred = [] y_true = [] unlabeled_is_added = False for train_index, test_index in indices_for_cv: cls = ImpartialTextClassifier(bert_hub_module_handle=(None if os.path.exists(os.path.normpath(bert_handle)) else bert_handle), filters_for_conv1=conv1_, filters_for_conv2=conv2_, filters_for_conv3=conv3_, filters_for_conv4=conv4_, filters_for_conv5=conv5_, hidden_layer_size=hidden_layer_size_, n_hidden_layers=n_hidden_layers_, batch_size=16, gpu_memory_frac=gpu_memory_frac, verbose=True, random_seed=42, num_monte_carlo=num_monte_carlo, max_epochs=100, patience=5, multioutput=multioutput, bayesian=(nn_type == 'bayesian'), kl_weight_init=init_kl_weight, kl_weight_fin=fin_kl_weight) if os.path.exists(os.path.normpath(bert_handle)): cls.PATH_TO_BERT = os.path.normpath(bert_handle) train_texts = labeled_texts[train_index] train_labels = labels[train_index] train_index_, val_index = cls.train_test_split(train_labels, 0.1) val_texts = train_texts[val_index] val_labels = train_labels[val_index] if unlabeled_texts_for_training is None: train_texts = train_texts[train_index_] train_labels = train_labels[train_index_] else: train_texts = np.concatenate( ( train_texts[train_index_], unlabeled_texts_for_training ) ) train_labels = np.concatenate( ( train_labels[train_index_], np.full(shape=(len(unlabeled_texts_for_training),), fill_value=-1, dtype=np.int32) ) ) cls.fit(train_texts, train_labels, validation_data=(val_texts, val_labels)) print('') del train_texts, train_labels, val_texts, val_labels, train_index_, val_index if (not unlabeled_is_added) and (unlabeled_texts_for_testing is not None): y_pred.append(cls.predict(unlabeled_texts_for_testing)) unlabeled_is_added = True y_true.append(np.full(shape=(len(unlabeled_texts_for_testing),), fill_value=-1, dtype=np.int32)) y_pred.append(cls.predict(labeled_texts[test_index])) y_true.append(labels[test_index]) del cls y_pred = np.concatenate(y_pred) y_true = np.concatenate(y_true) print('') if multioutput: for class_idx in range(len(classes_list)): y_true_ = np.zeros((len(y_true),), dtype=np.int32) y_pred_ = np.zeros((len(y_pred),), dtype=np.int32) for sample_idx in range(len(y_true)): if isinstance(y_true[sample_idx], set): if class_idx in y_true[sample_idx]: y_true_[sample_idx] = 1 elif class_idx == y_true[sample_idx]: y_true_[sample_idx] = 1 if isinstance(y_pred[sample_idx], set): if class_idx in y_pred[sample_idx]: y_pred_[sample_idx] = 1 elif class_idx == y_pred[sample_idx]: y_pred_[sample_idx] = 1 print(classification_report(y_true, y_pred, target_names=['OTHER', classes_list[class_idx]], digits=4)) else: for sample_idx in range(len(y_true)): if y_true[sample_idx] < 0: y_true[sample_idx] = len(classes_list) if y_pred[sample_idx] < 0: y_pred[sample_idx] = len(classes_list) print(classification_report(y_true, y_pred, target_names=classes_list + ['UNKNOWN'], digits=4)) print('') def train(args) -> ImpartialTextClassifier: conv1_ = int(args[0]) conv2_ = int(args[1]) conv3_ = int(args[2]) conv4_ = int(args[3]) conv5_ = int(args[4]) hidden_layer_size_ = int(args[5]) n_hidden_layers_ = int(args[6]) if (n_hidden_layers_ == 0) or (hidden_layer_size_ == 0): hidden_layer_size_ = 0 n_hidden_layers_ = 0 if nn_type == 'bayesian': init_kl_weight = float(args[7]) fin_kl_weight = float(args[8]) else: init_kl_weight = 1.0 fin_kl_weight = 1.0 train_index, val_index = ImpartialTextClassifier.train_test_split(labels, 0.1) if unlabeled_texts_for_training is None: train_texts = labeled_texts[train_index] train_labels = labels[train_index] else: train_texts = np.concatenate( ( labeled_texts[train_index], unlabeled_texts_for_training ) ) train_labels = np.concatenate( ( labels[train_index], np.full(shape=(len(unlabeled_texts_for_training),), fill_value=-1, dtype=np.int32) ) ) val_texts = labeled_texts[val_index] val_labels = labels[val_index] cls = ImpartialTextClassifier(bert_hub_module_handle=(None if os.path.exists(os.path.normpath(bert_handle)) else bert_handle), filters_for_conv1=conv1_, filters_for_conv2=conv2_, filters_for_conv3=conv3_, filters_for_conv4=conv4_, filters_for_conv5=conv5_, hidden_layer_size=hidden_layer_size_, n_hidden_layers=n_hidden_layers_, batch_size=16, gpu_memory_frac=gpu_memory_frac, num_monte_carlo=num_monte_carlo, verbose=True, random_seed=42, max_epochs=100, patience=5, multioutput=multioutput, bayesian=(nn_type == 'bayesian'), kl_weight_init=init_kl_weight, kl_weight_fin=fin_kl_weight) if os.path.exists(os.path.normpath(bert_handle)): cls.PATH_TO_BERT = os.path.normpath(bert_handle) cls.fit(train_texts, train_labels, validation_data=(val_texts, val_labels)) del train_texts, train_labels, val_texts, val_labels return cls parser = ArgumentParser() parser.add_argument('-m', '--model', dest='model_name', type=str, required=True, help='The binary file with the text classifier.') parser.add_argument('-b', '--bert', dest='bert', type=str, required=False, default='https://tfhub.dev/google/bert_multi_cased_L-12_H-768_A-12/1', help='URL of used TF-Hub BERT model (or path to the BERT model in local drive).') parser.add_argument('-c', '--csv', dest='csv_data_file', type=str, required=True, help='Path to the CSV file with labeled data.') parser.add_argument('-t', '--train', dest='train_file_name', type=str, required=False, default='', help='Path to the text file with unlabeled data for training.') parser.add_argument('-e', '--test', dest='test_file_name', type=str, required=False, default='', help='Path to the text file with unlabeled data for evaluation.') parser.add_argument('--gpu_frac', dest='gpu_memory_frac', type=float, required=False, default=0.9, help='Allocable part of the GPU memory for the classifier.') parser.add_argument('--nn_type', dest='nn_type', type=str, choices=['bayesian', 'usual'], required=False, default='bayesian', help='Neural network type: `bayesian` or `usual`.') parser.add_argument('--num_monte_carlo', dest='num_monte_carlo', type=int, required=False, default=100, help='Number of generated Monte Carlo samples for each data sample.') parser.add_argument('--conv1', dest='size_of_conv1', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 1.') parser.add_argument('--conv2', dest='size_of_conv2', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 2.') parser.add_argument('--conv3', dest='size_of_conv3', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 3.') parser.add_argument('--conv4', dest='size_of_conv4', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 4.') parser.add_argument('--conv5', dest='size_of_conv5', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 5.') parser.add_argument('--hidden', dest='hidden_layer_size', type=str, required=False, default='500', help='Size of each hidden layer and total number of hidden layers (separate them with colons).') parser.add_argument('--init_kl_weight', dest='init_kl_weight', type=float, required=False, default=1e-1, help='Initial value of KL weight.') parser.add_argument('--fin_kl_weight', dest='fin_kl_weight', type=float, required=False, default=1e-2, help='Final value of KL weight.') parser.add_argument('--search', dest='search_hyperparameters', required=False, action='store_true', default=False, help='Will be hyperparameters found by the Bayesian optimization?') cmd_args = parser.parse_args() num_monte_carlo = cmd_args.num_monte_carlo gpu_memory_frac = cmd_args.gpu_memory_frac bert_handle = cmd_args.bert nn_type = cmd_args.nn_type model_name = os.path.normpath(cmd_args.model_name) labeled_data_name = os.path.normpath(cmd_args.csv_data_file) unlabeled_train_data_name = cmd_args.train_file_name.strip() hidden_layer_size, n_hidden_layers = parse_hidden_layers_description(cmd_args.hidden_layer_size) if len(unlabeled_train_data_name) > 0: unlabeled_train_data_name = os.path.normpath(unlabeled_train_data_name) unlabeled_texts_for_training = load_unlabeled_texts(unlabeled_train_data_name) assert len(unlabeled_texts_for_training) > 0, 'File `{0}` is empty!'.format(unlabeled_train_data_name) else: unlabeled_texts_for_training = None unlabeled_test_data_name = cmd_args.test_file_name.strip() if len(unlabeled_test_data_name) > 0: unlabeled_test_data_name = os.path.normpath(unlabeled_test_data_name) unlabeled_texts_for_testing = load_unlabeled_texts(unlabeled_test_data_name) assert len(unlabeled_texts_for_testing) > 0, 'File `{0}` is empty!'.format(unlabeled_test_data_name) else: unlabeled_texts_for_testing = None labeled_texts, labels, classes_list = read_csv(labeled_data_name, 7) print('Number of labeled texts is {0}.'.format(len(labeled_texts))) print('Number of classes is {0}.'.format(len(classes_list))) if any(map(lambda it: isinstance(it, set), labels)): print('Some data samples can be corresponded to several labels at once.') multioutput = True else: multioutput = False print('') print_classes_distribution(labels, classes_list) np.random.seed(42) indices_for_cv = ImpartialTextClassifier.cv_split(labels, 5) if cmd_args.search_hyperparameters: dimensions = [Integer(0, 300), Integer(0, 300), Integer(0, 300), Integer(0, 300), Integer(0, 300), Integer(100, 2000), Integer(0, 3)] if nn_type == 'bayesian': dimensions += [Real(1e-5, 1.0, prior='log-uniform'), Real(1e-5, 1.0, prior='log-uniform')] optimal_res = gp_minimize( func, dimensions=dimensions, n_calls=100, n_random_starts=5, random_state=42, verbose=False, n_jobs=1 ) print('') hyperparameters = optimal_res.x else: hyperparameters = [cmd_args.size_of_conv1, cmd_args.size_of_conv2, cmd_args.size_of_conv3, cmd_args.size_of_conv4, cmd_args.size_of_conv5, hidden_layer_size, n_hidden_layers, cmd_args.init_kl_weight, cmd_args.fin_kl_weight] score(hyperparameters) with open(model_name, 'wb') as fp: pickle.dump(train(hyperparameters), fp)
def main(): random_seed = 42 parser = ArgumentParser() parser.add_argument('-m', '--model', dest='model_name', type=str, required=True, help='The binary file with the text classifier.') parser.add_argument( '-d', '--data_dir', dest='data_dir', type=str, required=True, help= 'Path to the directory with SNIPS-2017 data (see `2017-06-custom-intent-engines` subfolder' ' of the repository https://github.com/snipsco/nlu-benchmark).') parser.add_argument( '--conv1', dest='size_of_conv1', type=int, required=False, default=200, help='Size of the Bayesian convolution layer with kernel size 1.') parser.add_argument( '--conv2', dest='size_of_conv2', type=int, required=False, default=200, help='Size of the Bayesian convolution layer with kernel size 2.') parser.add_argument( '--conv3', dest='size_of_conv3', type=int, required=False, default=200, help='Size of the Bayesian convolution layer with kernel size 3.') parser.add_argument( '--conv4', dest='size_of_conv4', type=int, required=False, default=200, help='Size of the Bayesian convolution layer with kernel size 4.') parser.add_argument( '--conv5', dest='size_of_conv5', type=int, required=False, default=200, help='Size of the Bayesian convolution layer with kernel size 5.') parser.add_argument( '--hidden', dest='hidden_layer_size', type=str, required=False, default='500', help= 'Size of each hidden layer and total number of hidden layers (separate them with colons).' ) parser.add_argument( '--num_monte_carlo', dest='num_monte_carlo', type=int, required=False, default=100, help='Number of generated Monte Carlo samples for each data sample.') parser.add_argument('--batch_size', dest='batch_size', type=int, required=False, default=64, help='Size of mini-batch.') parser.add_argument( '--gpu_frac', dest='gpu_memory_frac', type=float, required=False, default=0.9, help='Allocable part of the GPU memory for the classifier.') parser.add_argument( '--nn_type', dest='nn_type', type=str, choices=['bayesian', 'usual', 'additional_class'], required=False, default='bayesian', help= 'Neural network type: `bayesian`, `usual` or `additional_class` (it is same as `usual` ' 'but unlabeled samples are modeled as additional class).') args = parser.parse_args() model_name = os.path.normpath(args.model_name) data_dir = os.path.normpath(args.data_dir) hidden_layer_size, n_hidden_layers = parse_hidden_layers_description( args.hidden_layer_size) train_data, val_data, test_data = read_snips2017_data(data_dir) print('Classes list: {0}'.format(sorted(list(set(train_data[1]))))) print('Number of samples for training is {0}.'.format(len(train_data[0]))) print('Number of samples for validation is {0}.'.format(len(val_data[0]))) print('Number of samples for final testing is {0}.'.format( len(test_data[0]))) generate_random_samples(train_data[0], train_data[1]) print('') unlabeled_texts_for_training = load_reuters_corpus() unlabeled_texts_for_testing = load_brown_corpus() random.seed(random_seed) print( 'Number of unlabeled (unknown) samples for training is {0}. For example:' .format(len(unlabeled_texts_for_training))) for it in random.sample(unlabeled_texts_for_training, 5): print(' {0}'.format(it)) print( 'Number of unlabeled (unknown) samples for final testing is {0}. For example:' .format(len(unlabeled_texts_for_testing))) for it in random.sample(unlabeled_texts_for_testing, 5): print(' {0}'.format(it)) print('') if os.path.isfile(model_name): with open(model_name, 'rb') as fp: nn = pickle.load(fp) else: if args.nn_type == 'additional_class': random.shuffle(unlabeled_texts_for_training) n = int(round(0.15 * len(unlabeled_texts_for_training))) train_texts = train_data[0] + unlabeled_texts_for_training[n:] train_labels = train_data[1] + [ 'UNKNOWN' for _ in range(len(unlabeled_texts_for_training) - n) ] val_texts = val_data[0] + unlabeled_texts_for_training[:n] val_labels = val_data[1] + ['UNKNOWN' for _ in range(n)] else: train_texts = train_data[0] + unlabeled_texts_for_training train_labels = train_data[1] + [ -1 for _ in range(len(unlabeled_texts_for_training)) ] val_texts = val_data[0] val_labels = val_data[1] nn = ImpartialTextClassifier(filters_for_conv1=args.size_of_conv1, filters_for_conv2=args.size_of_conv2, filters_for_conv3=args.size_of_conv3, filters_for_conv4=args.size_of_conv4, filters_for_conv5=args.size_of_conv5, batch_size=args.batch_size, hidden_layer_size=hidden_layer_size, n_hidden_layers=n_hidden_layers, num_monte_carlo=args.num_monte_carlo, gpu_memory_frac=args.gpu_memory_frac, verbose=True, multioutput=False, random_seed=random_seed, validation_fraction=0.15, max_epochs=50, patience=5, bayesian=(args.nn_type == 'bayesian'), kl_weight_init=1.0, kl_weight_fin=0.001) nn.fit(train_texts, train_labels, validation_data=(val_texts, val_labels)) print('') with open(model_name, 'wb') as fp: pickle.dump(nn, fp) test_texts = test_data[0] + unlabeled_texts_for_testing test_labels = test_data[1] + [ 'UNKNOWN' for _ in range(len(unlabeled_texts_for_testing)) ] start_time = time.time() if args.nn_type == 'additional_class': y_pred = [ nn.classes_reverse_index_[class_idx] for class_idx in nn.predict_proba(test_texts).argmax(axis=1) ] else: y_pred_ = nn.predict(test_texts) y_pred = [] for sample_idx in range(len(y_pred_)): if is_string(y_pred_[sample_idx]): y_pred.append(y_pred_[sample_idx]) else: if y_pred_[sample_idx] < 0: y_pred.append('UNKNOWN') else: y_pred.append(y_pred_[sample_idx]) end_time = time.time() print('Duration of testing is {0:.3f} seconds.'.format(end_time - start_time)) print( 'Mean duration of a single test sample recognition is {0:.3f} seconds.' .format((end_time - start_time) / float(len(test_texts)))) print('Results of {0}:'.format( 'bayesian neural network' if args.nn_type == 'bayesian' else ( 'usual neural network' if args.nn_type == 'usual' else 'usual neural network with additional class'))) print(classification_report(test_labels, y_pred, digits=4)) if args.nn_type != 'additional_class': print('') print('Results of {0} without UNKNOWN class:'.format( 'bayesian neural network' if args.nn_type == 'bayesian' else 'usual neural network')) y_pred = [ nn.classes_reverse_index_[class_idx] for class_idx in nn.predict_proba(test_data[0]).argmax(axis=1) ] print(classification_report(test_data[1], y_pred, digits=4))
def main(): parser = ArgumentParser() parser.add_argument('-m', '--model', dest='model_name', type=str, required=True, help='The binary file with the text classifier.') parser.add_argument('-t', '--train', dest='train_file_name', type=str, required=True, help='Path to the archive with DSTC-2 training data.') parser.add_argument('-e', '--test', dest='test_file_name', type=str, required=True, help='Path to the archive with DSTC-2 data for final testing.') parser.add_argument('--conv1', dest='size_of_conv1', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 1.') parser.add_argument('--conv2', dest='size_of_conv2', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 2.') parser.add_argument('--conv3', dest='size_of_conv3', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 3.') parser.add_argument('--conv4', dest='size_of_conv4', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 4.') parser.add_argument('--conv5', dest='size_of_conv5', type=int, required=False, default=20, help='Size of the Bayesian convolution layer with kernel size 5.') parser.add_argument('--hidden', dest='hidden_layer_size', type=str, required=False, default='500', help='Size of each hidden layer and total number of hidden layers (separate them with colons).') parser.add_argument('--num_monte_carlo', dest='num_monte_carlo', type=int, required=False, default=10, help='Number of generated Monte Carlo samples for each data sample.') parser.add_argument('--batch_size', dest='batch_size', type=int, required=False, default=16, help='Size of mini-batch.') parser.add_argument('--gpu_frac', dest='gpu_memory_frac', type=float, required=False, default=0.9, help='Allocable part of the GPU memory for the classifier.') parser.add_argument('--nn_type', dest='nn_type', type=str, choices=['bayesian', 'usual'], required=False, default='bayesian', help='Neural network type: `bayesian` or `usual`.') args = parser.parse_args() model_name = os.path.normpath(args.model_name) train_file_name = os.path.normpath(args.train_file_name) test_file_name = os.path.normpath(args.test_file_name) hidden_layer_size, n_hidden_layers = parse_hidden_layers_description(args.hidden_layer_size) if os.path.isfile(model_name): with open(model_name, 'rb') as fp: nn, train_classes = pickle.load(fp) print('Classes list: {0}'.format(train_classes)) print('') else: train_texts, train_labels, train_classes = read_dstc2_data(train_file_name) print('Classes list: {0}'.format(train_classes)) print('Number of samples for training is {0}.'.format(len(train_texts))) nn = ImpartialTextClassifier(filters_for_conv1=args.size_of_conv1, filters_for_conv2=args.size_of_conv2, filters_for_conv3=args.size_of_conv3, filters_for_conv4=args.size_of_conv4, filters_for_conv5=args.size_of_conv5, hidden_layer_size=hidden_layer_size, n_hidden_layers=n_hidden_layers, batch_size=args.batch_size, num_monte_carlo=args.num_monte_carlo, gpu_memory_frac=args.gpu_memory_frac, verbose=True, multioutput=True, random_seed=42, validation_fraction=0.15, max_epochs=100, patience=5, bayesian=(args.nn_type == 'bayesian')) nn.fit(train_texts, train_labels) print('') with open(model_name, 'wb') as fp: pickle.dump((nn, train_classes), fp) test_texts, test_labels, test_classes = read_dstc2_data(test_file_name, train_classes) assert test_classes == train_classes, 'Classes in the test set do not correspond to classes in the train set! ' \ '{0}'.format(test_classes) print('') print('Number of samples for final testing is {0}.'.format(len(test_texts))) y_pred = nn.predict(test_texts) accuracy_by_classes = dict() for class_idx in range(nn.n_classes_): n_total = 0 n_correct = 0 for sample_idx in range(len(test_texts)): if isinstance(test_labels[sample_idx], set): if class_idx in test_labels[sample_idx]: y_true_ = 1 else: y_true_ = 0 else: if class_idx == test_labels[sample_idx]: y_true_ = 1 else: y_true_ = 0 if isinstance(y_pred[sample_idx], set): if class_idx in y_pred[sample_idx]: y_pred_ = 1 else: y_pred_ = 0 else: if class_idx == y_pred[sample_idx]: y_pred_ = 1 else: y_pred_ = 0 if y_true_ == y_pred_: n_correct += 1 if y_true_ > 0: n_total += 1 if n_total > 0: accuracy_by_classes[class_idx] = float(n_correct) / float(len(test_texts)) total_accuracy = 0.0 name_width = 0 for class_idx in accuracy_by_classes.keys(): total_accuracy += accuracy_by_classes[class_idx] if len(test_classes[class_idx]) > name_width: name_width = len(test_classes[class_idx]) total_accuracy /= float(len(accuracy_by_classes)) print('Total accuracy: {0:6.2%}'.format(total_accuracy)) print('By classes:') for class_idx in sorted(list(accuracy_by_classes.keys())): print(' {0:<{1}} {2:6.2%}'.format(test_classes[class_idx], name_width, accuracy_by_classes[class_idx]))