Example #1
0
def main():
    """
    Command Line Inputs:
    Data file with lines: PAGE	URL	LABEL_1	LABEL_2	...	LABEL_n and associated text for page
    Input Model file with labels and array of weak learners
    Output Model file with correction factors
    Print the matthews correlation coefficient for each label
    """
    model = read_model(sys.argv[2])
    print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners']))
    page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model)
    print 'read {} pages'.format(len(page_labels))
    evaluate(model, page_tuples, page_labels, page_num_labels)
    model = normalize(model, page_tuples, page_labels, page_num_labels)
    write_model(sys.argv[3], model)
    evaluate(model, page_tuples, page_labels, page_num_labels)
Example #2
0
def main():
    """
    Command Line Inputs:
    Data file with lines: PAGE	URL	LABEL_1	LABEL_2	...	LABEL_n and associated text for page
    Input Model file with labels and array of weak learners
    Output the positive score TP / (TP + FP) for each label
    """
    model = read_model(sys.argv[2])
    # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners']))
    page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model)
    # print 'read {} pages'.format(len(page_labels))
    TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, False, TVDBG_P, TVDBG_L)
    score = score_positive(TP, TN, FP, FN, TVDBG_P, TVDBG_L)
    print 'score = {}'.format(score)
    page_total, page_count = get_page_count(page_labels)
    wgt_avg = weighted_average(page_total, page_count, score)
    print 'weighted average = {}'.format(wgt_avg)
Example #3
0
def main():
    """
    Command Line Inputs:
    Data file with lines: PAGE	URL	LABEL_1	LABEL_2	...	LABEL_n and associated text for page
    Input Model file with labels and array of weak learners
    Output the matthews correlation coefficient for each label
    """
    model = read_model(sys.argv[2])
    # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners']))
    page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model)
    # print 'read {} pages'.format(len(page_labels))
    TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, False)
    matthews = score_matthews(TP, TN, FP, FN)
    print 'score = {}'.format(matthews)
    page_total, page_count = get_page_count(page_labels)
    wgt_avg = weighted_average(page_total, page_count, matthews)
    print 'weighted average = {}'.format(wgt_avg)
Example #4
0
def main():
    """
    Command Line Inputs:
    Data file with lines: PAGE	URL	LABEL_1	LABEL_2	...	LABEL_n and associated text for page
    Input Model file with labels and array of weak learners
    Output the positive score TP / (TP + FP) for each label
    """
    model = read_model(sys.argv[2])
    # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners']))
    page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model)
    # print 'read {} pages'.format(len(page_labels))
    TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, False)
    score = score_positive(TP, TN, FP, FN)
    print 'score = {}'.format(score)
    page_total, page_count = get_page_count(page_labels)
    wgt_avg = weighted_average(page_total, page_count, score)
    print 'weighted average = {}'.format(wgt_avg)
Example #5
0
def main():
    """
    Command Line Inputs:
    Data file with lines: PAGE	URL	LABEL_1	LABEL_2	...	LABEL_n and associated text for page
    Input Model file with labels and array of weak learners
    Output Model file with correction factors
    Print the matthews correlation coefficient for each label
    """
    model = read_model(sys.argv[2])
    print 'model has {} labels and {} tuples'.format(len(model['labels']),
                                                     len(model['learners']))
    page_tuples, page_labels, page_num_labels = read_data_labels(
        sys.argv[1], model)
    print 'read {} pages'.format(len(page_labels))
    evaluate(model, page_tuples, page_labels, page_num_labels)
    model = normalize(model, page_tuples, page_labels, page_num_labels)
    write_model(sys.argv[3], model)
    evaluate(model, page_tuples, page_labels, page_num_labels)
Example #6
0
def main():
    """
    Command Line Inputs:
    Data file with lines: PAGE	URL	LABEL_1	LABEL_2	...	LABEL_n and associated text for page
    Input Model file with labels and array of weak learners
    Output the matthews correlation coefficient for each label
    """
    model = read_model(sys.argv[2])
    # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners']))
    page_tuples, page_labels, page_num_labels = read_data_labels(
        sys.argv[1], model)
    # print 'read {} pages'.format(len(page_labels))
    TP, TN, FP, FN = score_errors(model, page_tuples, page_labels,
                                  page_num_labels, True, TVDBG_P, TVDBG_L)
    matthews = score_matthews(TP, TN, FP, FN, TVDBG_P, TVDBG_L)
    print 'score = {}'.format(matthews)
    page_total, page_count = get_page_count(page_labels)
    wgt_avg = weighted_average(page_total, page_count, matthews)
    print 'weighted average = {}'.format(wgt_avg)