Example #1
0
def main():
    """
    Command Line Inputs:
    Input: Data file with lines: PAGE	URL	LABEL_1	LABEL_2	...	LABEL_n and associated text for page
    Input: Model file with labels and array of weak learners
    Output: New Model file with labels and array of weak learners with naive bayesian weights
    """
    pages, labels, label_text, tuples, tuples_selected, tuple_text = read_data_tuples(
        sys.argv[1])
    model = read_model(sys.argv[2])
    print 'model has {} labels and {} tuples'.format(len(model['labels']),
                                                     len(model['learners']))
    naivebayes(model, labels, tuples, tuples_selected, label_text, tuple_text)
    write_model(sys.argv[3], model)
Example #2
0
def main():
    """
    Command Line Inputs:
    Input: Data file with lines: PAGE	URL	LABEL_1	LABEL_2	...	LABEL_n and associated text for page
    Input: Model file with labels and array of weak learners
    Output: New Model file with labels and array of weak learners with normalized weights
    """
    pages, labels, label_text, tuples, tuples_selected, tuple_text = read_data_tuples(sys.argv[1])
    model = read_model(sys.argv[2])
    print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners']))
    # for l in range(len(labels)):
        # print '[{}]={}: [{}]'.format(l, label_text[l], \
        #    ' '.join(str(labels[l][p]) for p in range(len(labels[0]))))
    cntnorm(model, labels, tuples, label_text, tuple_text)
    write_model(sys.argv[3], model)
Example #3
0
def main():
    """
    Command Line Inputs:
    Data file with lines: PAGE	URL	LABEL_1	LABEL_2	...	LABEL_n and associated text for page
    Input Model file with labels and array of weak learners
    Output Model file with correction factors
    Print the matthews correlation coefficient for each label
    """
    model = read_model(sys.argv[2])
    print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners']))
    page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model)
    print 'read {} pages'.format(len(page_labels))
    evaluate(model, page_tuples, page_labels, page_num_labels)
    model = normalize(model, page_tuples, page_labels, page_num_labels)
    write_model(sys.argv[3], model)
    evaluate(model, page_tuples, page_labels, page_num_labels)
Example #4
0
def main():
    """
    Command Line Inputs:
    Input: Data file with lines: PAGE	URL	LABEL_1	LABEL_2	...	LABEL_n and associated text for page
    Input: Model file with labels and array of weak learners
    Output: New Model file with labels and array of weak learners with normalized weights
    """
    pages, labels, label_text, tuples, tuples_selected, tuple_text = read_data_tuples(
        sys.argv[1])
    model = read_model(sys.argv[2])
    print 'model has {} labels and {} tuples'.format(len(model['labels']),
                                                     len(model['learners']))
    # for l in range(len(labels)):
    # print '[{}]={}: [{}]'.format(l, label_text[l], \
    #    ' '.join(str(labels[l][p]) for p in range(len(labels[0]))))
    cntnorm(model, labels, tuples, label_text, tuple_text)
    write_model(sys.argv[3], model)
Example #5
0
def main():
    """
    Command Line Inputs:
    Data file with lines: PAGE	URL	LABEL_1	LABEL_2	...	LABEL_n and associated text for page
    Input Model file with labels and array of weak learners
    Output Model file with correction factors
    Print the matthews correlation coefficient for each label
    """
    model = read_model(sys.argv[2])
    print 'model has {} labels and {} tuples'.format(len(model['labels']),
                                                     len(model['learners']))
    page_tuples, page_labels, page_num_labels = read_data_labels(
        sys.argv[1], model)
    print 'read {} pages'.format(len(page_labels))
    evaluate(model, page_tuples, page_labels, page_num_labels)
    model = normalize(model, page_tuples, page_labels, page_num_labels)
    write_model(sys.argv[3], model)
    evaluate(model, page_tuples, page_labels, page_num_labels)