def main(): """ Command Line Inputs: Input: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input: Model file with labels and array of weak learners Output: New Model file with labels and array of weak learners with naive bayesian weights """ pages, labels, label_text, tuples, tuples_selected, tuple_text = read_data_tuples( sys.argv[1]) model = read_model(sys.argv[2]) print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) naivebayes(model, labels, tuples, tuples_selected, label_text, tuple_text) write_model(sys.argv[3], model)
def main(): """ Command Line Inputs: Input: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input: Model file with labels and array of weak learners Output: New Model file with labels and array of weak learners with normalized weights """ pages, labels, label_text, tuples, tuples_selected, tuple_text = read_data_tuples(sys.argv[1]) model = read_model(sys.argv[2]) print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) # for l in range(len(labels)): # print '[{}]={}: [{}]'.format(l, label_text[l], \ # ' '.join(str(labels[l][p]) for p in range(len(labels[0])))) cntnorm(model, labels, tuples, label_text, tuple_text) write_model(sys.argv[3], model)
def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output Model file with correction factors Print the matthews correlation coefficient for each label """ model = read_model(sys.argv[2]) print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model) print 'read {} pages'.format(len(page_labels)) evaluate(model, page_tuples, page_labels, page_num_labels) model = normalize(model, page_tuples, page_labels, page_num_labels) write_model(sys.argv[3], model) evaluate(model, page_tuples, page_labels, page_num_labels)
def main(): """ Command Line Inputs: Input: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input: Model file with labels and array of weak learners Output: New Model file with labels and array of weak learners with normalized weights """ pages, labels, label_text, tuples, tuples_selected, tuple_text = read_data_tuples( sys.argv[1]) model = read_model(sys.argv[2]) print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) # for l in range(len(labels)): # print '[{}]={}: [{}]'.format(l, label_text[l], \ # ' '.join(str(labels[l][p]) for p in range(len(labels[0])))) cntnorm(model, labels, tuples, label_text, tuple_text) write_model(sys.argv[3], model)
def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output Model file with correction factors Print the matthews correlation coefficient for each label """ model = read_model(sys.argv[2]) print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels( sys.argv[1], model) print 'read {} pages'.format(len(page_labels)) evaluate(model, page_tuples, page_labels, page_num_labels) model = normalize(model, page_tuples, page_labels, page_num_labels) write_model(sys.argv[3], model) evaluate(model, page_tuples, page_labels, page_num_labels)