def main(): # Getting required data with open(TARGETS_PATH, "rb") as f: target_array, scores = pickle.load(f) target_list = target_array.tolist() with open(FMAT_PATH, "rb") as f: fmat = pickle.load(f) with open(PASSAGES_PATH, "rb") as f: passages = pickle.load(f) passages = passages[:NUM_PASSAGES] terminals, token_labels = tokeneval.get_terminals_labels(passages) tokens = [x.text for x in terminals] # Running through random parameters settings #for i, params in enumerate(params_generator(NUM_SAMPLING)): for i, params in enumerate(PARAMS): sys.stderr.write('{} {}\n'.format(METHOD, i)) clas, _, _ = classify_scene.self_train_classifier( fmat, scores, target_array, params, method=METHOD, c_param=CLS_PRM, nu_param=CLS_PRM, learn_rate=CLS_PRM, n_estimators=500) target_labels = [int(x >= classify_scene.PRE_LABELS_THRESH) for x in scores] target_labels += list(classify_scene.predict_labels(clas, fmat[len(scores):])) stats = tokeneval.evaluate_with_type(tokens, token_labels, target_list, target_labels) print("\t".join([str(x) for x in params] + [str(len(x)) for x in stats]))
def evaluate_with_classifier(tokens, token_labels, targets, token_features, classifier): tp, tn, fp, fn = [], [], [], [] # True/Flase positive/negative labels found, not_found = [], [] pred = classify_scene.predict_labels(classifier, token_features).tolist() for token, token_label, guessed_label in zip(tokens, token_labels, pred): lemma = lemmatize(token, targets) if lemma in targets: found.append((token, token_label)) if guessed_label == token_label == 0: tn.append(token) elif guessed_label == token_label == 1: tp.append(token) elif token_label == 0: fp.append(token) else: fn.append(token) else: not_found.append((token, token_label)) return found, not_found, tp, tn, fp, fn
def main(): with open(PASSAGES_PATH, "rb") as f: passages = pickle.load(f) passages = passages[:NUM_PASSAGES] terminals, token_labels = tokeneval.get_terminals_labels(passages) tokens = [x.text for x in terminals] clas = classify_scene.train_classifier(FMAT[:len(LABELS)], LABELS, METHOD, c_param=PARAM, nu_param=PARAM, learn_rate=PARAM, n_estimators=500) if TOKENS_FMAT is not None: # use token evaluation, not type stats = tokeneval.evaluate_with_classifier(tokens, token_labels, TARGETS, TOKENS_FMAT, clas) else: target_labels = LABELS.tolist() target_labels += classify_scene.predict_labels(clas, FMAT[len(LABELS):]).tolist() stats = tokeneval.evaluate_with_type(tokens, token_labels, TARGETS, target_labels) print("\t".join(str(len(x)) for x in stats))