Beispiel #1
0
def main():
    with open(PASSAGES_PATH, "rb") as f:
        passages = pickle.load(f)
    passages = passages[:NUM_PASSAGES]
    terminals, token_labels = tokeneval.get_terminals_labels(passages)
    tokens = [x.text for x in terminals]

    clas = classify.train_classifier(FMAT[:len(LABELS)],
                                     LABELS,
                                     METHOD,
                                     c_param=PARAM,
                                     nu_param=PARAM,
                                     learn_rate=PARAM,
                                     n_estimators=500)
    if TOKENS_FMAT is not None:  # use token evaluation, not type
        stats = tokeneval.evaluate_with_classifier(tokens, token_labels,
                                                   TARGETS, TOKENS_FMAT, clas)
    else:
        target_labels = LABELS.tolist()
        target_labels += classify.predict_labels(clas,
                                                 FMAT[len(LABELS):]).tolist()
        stats = tokeneval.evaluate_with_type(tokens, token_labels, TARGETS,
                                             target_labels)

    print("\t".join(str(len(x)) for x in stats))
Beispiel #2
0
def main():
    # Getting required data
    with open(TARGETS_PATH, "rb") as f:
        target_array, scores = pickle.load(f)
        target_list = target_array.tolist()
    with open(FMAT_PATH, "rb") as f:
        fmat = pickle.load(f)
    with open(PASSAGES_PATH, "rb") as f:
        passages = pickle.load(f)
    with open(TOKENS_FMAT, "rb") as f:
        tokens_fmat = pickle.load(f)
    passages = passages[:NUM_PASSAGES]
    terminals, token_labels = tokeneval.get_terminals_labels(passages)
    tokens = [x.text for x in terminals]

    # Running through random parameters settings
    #for i, params in enumerate(params_generator(NUM_SAMPLING)):
    for i, params in enumerate(PARAMS):
        sys.stderr.write('{} {}\n'.format(METHOD, i))
        clas, _, _ = classify.self_train_classifier(
            fmat, scores, target_array, params, method=METHOD,
            c_param=CLS_PRM, nu_param=CLS_PRM, learn_rate=CLS_PRM,
            n_estimators=500)
        target_labels = [int(x >= classify.PRE_LABELS_THRESH) for x in scores]
        target_labels += list(classify.predict_labels(clas,
                                                      fmat[len(scores):]))
        stats = tokeneval.evaluate_with_classifier(
            tokens, token_labels, target_list, tokens_fmat, clas)
        print("\t".join([str(x)
                         for x in params] + [str(len(x)) for x in stats]))
Beispiel #3
0
def evaluate_with_classifier(tokens, token_labels, targets, token_features, classifier):
    tp, tn, fp, fn = [], [], [], []  # True/Flase positive/negative labels
    found, not_found = [], []
    pred = classify.predict_labels(classifier, token_features).tolist()
    for token, token_label, guessed_label in zip(tokens, token_labels, pred):
        lemma = lemmatize(token, targets)
        if lemma in targets:
            found.append((token, token_label))
            if guessed_label == token_label == 0:
                tn.append(token)
            elif guessed_label == token_label == 1:
                tp.append(token)
            elif token_label == 0:
                fp.append(token)
            else:
                fn.append(token)
        else:
            not_found.append((token, token_label))
    return found, not_found, tp, tn, fp, fn
Beispiel #4
0
def main():
    with open(PASSAGES_PATH, "rb") as f:
        passages = pickle.load(f)
    passages = passages[:NUM_PASSAGES]
    terminals, token_labels = tokeneval.get_terminals_labels(passages)
    tokens = [x.text for x in terminals]

    clas = classify.train_classifier(FMAT[:len(LABELS)], LABELS, METHOD,
                                     c_param=PARAM, nu_param=PARAM,
                                     learn_rate=PARAM, n_estimators=500)
    if TOKENS_FMAT is not None:  # use token evaluation, not type
        stats = tokeneval.evaluate_with_classifier(tokens, token_labels,
                                                   TARGETS, TOKENS_FMAT, clas)
    else:
        target_labels = LABELS.tolist()
        target_labels += classify.predict_labels(clas, FMAT[len(LABELS):]).tolist()
        stats = tokeneval.evaluate_with_type(tokens, token_labels, TARGETS,
                                             target_labels)

    print("\t".join(str(len(x)) for x in stats))
Beispiel #5
0
def evaluate_with_classifier(tokens, token_labels, targets,
                             token_features, classifier):
    tp, tn, fp, fn = [], [], [], []  # True/Flase positive/negative labels
    found, not_found = [], []
    pred = classify.predict_labels(classifier, token_features).tolist()
    for token, token_label, guessed_label in zip(tokens, token_labels, pred):
        lemma = lemmatize(token, targets)
        if lemma in targets:
            found.append((token, token_label))
            if guessed_label == token_label == 0:
                tn.append(token)
            elif guessed_label == token_label == 1:
                tp.append(token)
            elif token_label == 0:
                fp.append(token)
            else:
                fn.append(token)
        else:
            not_found.append((token, token_label))
    return found, not_found, tp, tn, fp, fn