def run_baselines(): ROOT_DIR = dirname(dirname(__file__)) gold_data_folder = join(ROOT_DIR, 'data/training/') all_debates = [ join(gold_data_folder, debate_name) for debate_name in listdir(gold_data_folder) ] all_debates.sort() train_debates = all_debates[:-1] test_debate = all_debates[-1] random_baseline_fpath = join(ROOT_DIR, 'baselines/data/task1_random_baseline.tsv') run_random_baseline(test_debate, random_baseline_fpath) if check_format(random_baseline_fpath): thresholds, precisions, avg_precision, reciprocal_rank, num_relevant = evaluate( test_debate, random_baseline_fpath) print("Random Baseline AVGP:", avg_precision) ngram_baseline_fpath = join(ROOT_DIR, 'baselines/data/task1_ngram_baseline.tsv') run_ngram_baseline(train_debates, test_debate, ngram_baseline_fpath) if check_format(ngram_baseline_fpath): thresholds, precisions, avg_precision, reciprocal_rank, num_relevant = evaluate( test_debate, ngram_baseline_fpath) print("Ngram Baseline AVGP:", avg_precision)
def run_baselines(): gold_data_folder = join(ROOT_DIR, 'data/training/') gold_data_folder = [join(gold_data_folder, debate_name) for debate_name in listdir(gold_data_folder)] gold_data_folder.sort() n_train = int(.8 *len(gold_data_folder)) train_debates = gold_data_folder[:n_train] dev_debates = gold_data_folder[n_train:] run_random_baseline(dev_debates) avg_precisions = [] for test_debate in dev_debates: random_baseline_fpath = join(ROOT_DIR, 'baselines/data/task5_random_baseline_%s'%(os.path.basename(test_debate))) if check_format(random_baseline_fpath): thresholds, precisions, avg_precision, reciprocal_rank, num_relevant = evaluate(test_debate, random_baseline_fpath) avg_precisions.append(avg_precision) print("Random Baseline AVGP:", np.mean(avg_precisions)) run_ngram_baseline(train_debates, dev_debates) avg_precisions = [] for test_debate in dev_debates: ngram_baseline_fpath = join(ROOT_DIR, 'baselines/data/task5_ngram_baseline_%s'%(os.path.basename(test_debate))) if check_format(ngram_baseline_fpath): thresholds, precisions, avg_precision, reciprocal_rank, num_relevant = evaluate(test_debate, ngram_baseline_fpath) avg_precisions.append(avg_precision) print("Ngram Baseline AVGP:", np.mean(avg_precisions))
def run_baselines(): train_fpath = join(ROOT_DIR, 'data/training.tsv') test_fpath = join(ROOT_DIR, 'data/dev.tsv') run_random_baseline(test_fpath) random_baseline_fpath = join( ROOT_DIR, 'baselines/data/task1_random_baseline_%s' % (os.path.basename(test_fpath))) if check_format(random_baseline_fpath): thresholds, precisions, avg_precision, reciprocal_rank, num_relevant = evaluate( test_fpath, random_baseline_fpath) print("Random Baseline AVGP:", avg_precision) run_ngram_baseline(train_fpath, test_fpath) ngram_baseline_fpath = join( ROOT_DIR, 'baselines/data/task1_ngram_baseline_%s' % (os.path.basename(test_fpath))) if check_format(ngram_baseline_fpath): thresholds, precisions, avg_precision, reciprocal_rank, num_relevant = evaluate( test_fpath, ngram_baseline_fpath) print("Ngram Baseline AVGP:", avg_precision)
def validate_files(pred_files, gold_files): if len(pred_files) != len(gold_files): logging.error( 'Different number of gold files ({}) and pred files ({}) provided. Cannot score.'.format( len(gold_files), len(pred_files) ) ) return False if len(pred_files) != len(set(pred_files)): logging.error('Same pred file provided multiple times. The pred files should be for different debates.') return False for pred_file in pred_files: if not check_format(pred_file): logging.error('Bad format for pred file {}. Cannot score.'.format(pred_file)) return False return True
def validate_files(pred_file, gold_file): if not check_format(pred_file): logging.error( 'Bad format for pred file {}. Cannot score.'.format(pred_file)) return False return True