def run(task_name, data_dir, pipeline_name, print_predictions, error_analysis, remove_stopwords): logger.info('>> Running {} experiment'.format(task_name)) tsk = task(task_name) logger.info('>> Loading data...') tsk.load(data_dir) logger.info('>> retrieving train/data instances...') train_X, train_y, test_X, test_y = utils.get_instances( tsk, split_train_dev=False) logger.info('>> Descriptive statistics dataset:') utils.descriptive_statistics(train_X, train_y, test_X, test_y) test_X_ref = test_X if remove_stopwords: if pipeline_name.startswith('cnn'): pipeline_name = pipeline_name.split('_')[0] pipeline_name = pipeline_name + '_stopwords' if pipeline_name.startswith('cnn'): pipe = cnn(pipeline_name) train_X, train_y, test_X, test_y = pipe.encode(train_X, train_y, test_X, test_y) logger.info('>> testing CNN...') else: pipe = pipeline(pipeline_name) logger.info('>> training pipeline ' + pipeline_name) pipe.fit(train_X, train_y) if pipeline_name == 'naive_bayes_counts_lex': logger.info(" -- Found {} tokens in lexicon".format( pipe.tokens_from_lexicon)) logger.info('>> testing...') sys_y = pipe.predict(test_X) # logger.info(utils.print_prediction(test_X, test_y, sys_y)) if print_predictions: logger.info('>> predictions1') utils.print_all_predictions(test_X_ref, test_y, sys_y, logger) if error_analysis: # Used for error evaluation logger.info(utils.print_error_analysis(test_X, test_y, sys_y)) # logger.info(utils.print_confusion_matrix(test_y, sys_y)) # Prints the confusion matrix utils.eval(test_y, sys_y, pipeline_name, data_dir) if pipeline_name.startswith('naive_bayes'): utils.important_features_per_class(pipe.named_steps.frm, pipe.named_steps.clf, n=10)
def run(task_name, data_dir, pipeline_name, print_predictions): logger.info('>> Running {} experiment'.format(task_name)) tsk = task(task_name) logger.info('>> Loading data...') tsk.load(data_dir) logger.info('>> retrieving train/data instances...') train_X, train_y, test_X, test_y = utils.get_instances( tsk, split_train_dev=False) test_X_ref = test_X if pipeline_name.startswith('cnn'): pipe = cnn(pipeline_name) train_X, train_y, test_X, test_y = pipe.encode(train_X, train_y, test_X, test_y) logger.info('>> testing...') else: pipe = pipeline(pipeline_name) logger.info('>> training pipeline ' + pipeline_name) pipe.fit(train_X, train_y) if pipeline_name == 'naive_bayes_counts_lex': logger.info(" -- Found {} tokens in lexicon".format( pipe.tokens_from_lexicon)) logger.info('>> testing...') sys_y = pipe.predict(test_X) logger.info('>> evaluation...') logger.info(utils.eval(test_y, sys_y)) if print_predictions: logger.info('>> predictions') utils.print_all_predictions(test_X_ref, test_y, sys_y, logger)
def run(task_name, data_dir, pipeline_name): logger.info('>> Running {} experiment'.format(task_name)) tsk = task(task_name) logger.info('>> Loading data...') tsk.load(data_dir) logger.info('>> retrieving train/test instances...') train_X, train_y, test_X, test_y = utils.get_instances( tsk, split_train_dev=False) if pipeline_name.startswith('cnn'): pipe = cnn(pipeline_name) train_X, train_y, test_X, test_y = pipe.encode(train_X, train_y, test_X, test_y) logger.info('>> testing...') else: pipe = pipeline(pipeline_name) logger.info('>> training pipeline ' + pipeline_name) pipe.fit(train_X, train_y) logger.info('>> testing...') sys_y = pipe.predict(test_X) logger.info(utils.print_prediction(test_X, test_y, sys_y)) logger.info('>> evaluation...') logger.info(utils.eval(test_y, sys_y))
def evaluate(model, test_X, test_y): #sys_y = model.predict_classes(test_X, batch_size=128) sys_y = (model.predict(test_X) > 0.5).astype('int32') #sys_y = model.predict(test_X > 0.5).astype('int32') print(utils.eval(test_y, sys_y))
def evaluate(model, test_X, test_y): sys_y = model.predict_classes(test_X, batch_size=128) print(utils.eval(test_y, sys_y))