def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output Model file with correction factors Print the matthews correlation coefficient for each label """ model = read_model(sys.argv[2]) print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model) print 'read {} pages'.format(len(page_labels)) evaluate(model, page_tuples, page_labels, page_num_labels) model = normalize(model, page_tuples, page_labels, page_num_labels) write_model(sys.argv[3], model) evaluate(model, page_tuples, page_labels, page_num_labels)
def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output the positive score TP / (TP + FP) for each label """ model = read_model(sys.argv[2]) # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model) # print 'read {} pages'.format(len(page_labels)) TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, False, TVDBG_P, TVDBG_L) score = score_positive(TP, TN, FP, FN, TVDBG_P, TVDBG_L) print 'score = {}'.format(score) page_total, page_count = get_page_count(page_labels) wgt_avg = weighted_average(page_total, page_count, score) print 'weighted average = {}'.format(wgt_avg)
def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output the matthews correlation coefficient for each label """ model = read_model(sys.argv[2]) # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model) # print 'read {} pages'.format(len(page_labels)) TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, False) matthews = score_matthews(TP, TN, FP, FN) print 'score = {}'.format(matthews) page_total, page_count = get_page_count(page_labels) wgt_avg = weighted_average(page_total, page_count, matthews) print 'weighted average = {}'.format(wgt_avg)
def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output the positive score TP / (TP + FP) for each label """ model = read_model(sys.argv[2]) # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model) # print 'read {} pages'.format(len(page_labels)) TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, False) score = score_positive(TP, TN, FP, FN) print 'score = {}'.format(score) page_total, page_count = get_page_count(page_labels) wgt_avg = weighted_average(page_total, page_count, score) print 'weighted average = {}'.format(wgt_avg)
def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output Model file with correction factors Print the matthews correlation coefficient for each label """ model = read_model(sys.argv[2]) print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels( sys.argv[1], model) print 'read {} pages'.format(len(page_labels)) evaluate(model, page_tuples, page_labels, page_num_labels) model = normalize(model, page_tuples, page_labels, page_num_labels) write_model(sys.argv[3], model) evaluate(model, page_tuples, page_labels, page_num_labels)
def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output the matthews correlation coefficient for each label """ model = read_model(sys.argv[2]) # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels( sys.argv[1], model) # print 'read {} pages'.format(len(page_labels)) TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, True, TVDBG_P, TVDBG_L) matthews = score_matthews(TP, TN, FP, FN, TVDBG_P, TVDBG_L) print 'score = {}'.format(matthews) page_total, page_count = get_page_count(page_labels) wgt_avg = weighted_average(page_total, page_count, matthews) print 'weighted average = {}'.format(wgt_avg)