Exemple #1
0
    def test_learner_on_data():
        import GwData
        import WordTokenizer
        import numpy as np

        MINIMUM_COVERAGE_PCT = 2.0
        code = "53"
        print "Learning rules for code: " + code
        # '%%' is how you print a '%' in python given that it is a special char
        print "Mininum coverage: %d%%\n" % (MINIMUM_COVERAGE_PCT)

        data = GwData.GwData()
        xs = WordTokenizer.tokenize(data.documents,
                                    stem=False,
                                    spelling_correct=False,
                                    remove_stop_words=False,
                                    min_word_count=1)

        ys = data.labels_for(code)

        def rule_score_fn(act_ys, predicted):
            r, p, f1 = rpf1(act_ys, predicted)
            return r * (p**0.5)

        shuffled_ixs = np.array(range(len(xs)))
        np.random.shuffle(shuffled_ixs)

        shuffled_xs = np.array(xs)[shuffled_ixs]
        shuffled_ys = np.array(ys)[shuffled_ixs]

        td_size = int(len(xs) * 0.9)

        td_xs, td_ys = shuffled_xs[0:td_size], shuffled_ys[0:td_size]
        vd_xs, vd_ys = shuffled_xs[td_size:], shuffled_ys[td_size:]

        assert len(td_xs) + len(vd_xs) == len(xs), "|TD| + |VD| == |D|"

        learner = RegExLearner(precision, f1_score, MINIMUM_COVERAGE_PCT)
        learner.fit(td_xs, td_ys)

        print_positives(xs, ys)
        print str(learner)

        # TD Performance
        td_pred = learner.predict(td_xs)
        r, p, f1 = rpf1(td_ys, td_pred)
        print "TD:\n\tRecall: {0}\n\tPrecision: {1}\n\tF1: {2}\n".format(
            r, p, f1)

        # VD performance
        vd_pred = learner.predict(vd_xs)
        r, p, f1 = rpf1(vd_ys, vd_pred)
        print "VD:\n\tRecall: {0}\n\tPrecision: {1}\n\tF1: {2}\n".format(
            r, p, f1)

        pass
    def test_learner_on_data():
        import GwData
        import WordTokenizer
        import numpy as np

        MINIMUM_COVERAGE_PCT = 2.0
        code = "53"
        print "Learning rules for code: " + code
        # '%%' is how you print a '%' in python given that it is a special char
        print "Mininum coverage: %d%%\n" % (MINIMUM_COVERAGE_PCT)

        data = GwData.GwData()
        xs = WordTokenizer.tokenize(data.documents, stem=False,  spelling_correct=False, remove_stop_words=False, min_word_count=1)

        ys = data.labels_for(code)

        def rule_score_fn(act_ys, predicted):
            r, p, f1 = rpf1(act_ys, predicted)
            return r * (p ** 0.5)

        shuffled_ixs = np.array(range(len(xs)))
        np.random.shuffle(shuffled_ixs)

        shuffled_xs = np.array(xs)[shuffled_ixs]
        shuffled_ys = np.array(ys)[shuffled_ixs]

        td_size = int(len(xs) * 0.9)

        td_xs, td_ys = shuffled_xs[0:td_size], shuffled_ys[0:td_size]
        vd_xs, vd_ys = shuffled_xs[td_size:], shuffled_ys[td_size:]

        assert len(td_xs) + len(vd_xs) == len(xs), "|TD| + |VD| == |D|"

        learner = RegExLearner(precision, f1_score, MINIMUM_COVERAGE_PCT)
        learner.fit(td_xs, td_ys)

        print_positives(xs, ys)
        print str(learner)

        # TD Performance
        td_pred = learner.predict(td_xs)
        r, p, f1 = rpf1(td_ys, td_pred)
        print "TD:\n\tRecall: {0}\n\tPrecision: {1}\n\tF1: {2}\n".format(r, p, f1)

        # VD performance
        vd_pred = learner.predict(vd_xs)
        r, p, f1 = rpf1(vd_ys, vd_pred)
        print "VD:\n\tRecall: {0}\n\tPrecision: {1}\n\tF1: {2}\n".format(r, p, f1)

        pass
Exemple #3
0
    def test_learner_on_data():
        import GwData
        import WordTokenizer

        code = "50"

        data = GwData.GwData()
        xs = WordTokenizer.tokenize(data.documents, spelling_correct=False)
        ys = data.labels_for(code)

        def rule_score_fn(act_ys, predicted):
            return precision(act_ys, predicted) * (recall(act_ys, predicted)**
                                                   0.5)

        learner = RegExLearner(precision, f1_score, 2.5)
        learner.fit(xs, ys)
        pred = learner.predict(xs)

        # TD Performance
        print_positives(xs, ys)
        r, p, f1 = rpf1(ys, pred)
        print "TD:\n\tRecall: {0}\n\tPrecision: {1}\n\tF1: {2}\n".format(
            r, p, f1)
        print str(learner)
        pass
def test(epochs = 1):
    results = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_split=0.0, show_accuracy=True, verbose=1)
    #valid_probs = model.predict_proba(X_valid, batch_size=batch_size)
    test_probs  = model.predict_proba(X_test,  batch_size=batch_size)
    valid_f1s = []
    test_f1s = []
    test_f1s_50 = []
    cutoff = 0
    for ix, tag in ix2tag.items():
        #valid_tag_predictions = valid_probs[:, ix]
        test_tag_predictions  = test_probs[:, ix]

        #valid_tag_ys = y_valid[:, ix]
        test_tag_ys  =  y_test[:, ix]

        #r_v, p_v, f1_v, cutoff = find_cutoff(valid_tag_ys, valid_tag_predictions)
        #alid_f1s.append(f1_v)

        #test_classes =      [1 if p >= cutoff else 0 for p in test_tag_predictions]
        test_classes_5050 = [1 if p >= 0.5 else 0 for p in test_tag_predictions]

        #r, p, f1 = rpf1(test_tag_ys, test_classes)
        r50, p50, f150 = rpf1(test_tag_ys, test_classes_5050)
        #print("VALIDATION:", tag.ljust(35), str(sum(valid_tag_ys)).ljust(3), "recall", rnd(r_v), "precision", rnd(p_v), "f1", rnd(f1_v), "cutoff", rnd(cutoff))
        #print("TEST      :", tag.ljust(35), str(sum(test_tag_ys)).ljust(3),  "recall", rnd(r),   "precision", rnd(p),   "f1", rnd(f1),   "cutoff", rnd(cutoff))
        print("TEST 50/50:", tag.ljust(35), str(sum(test_tag_ys)).ljust(3),  "recall", rnd(r50),   "precision", rnd(p50),   "f1", rnd(f150),   "cutoff", rnd(cutoff))

        #test_f1s.append(f1)
        test_f1s_50.append(f150)

    #print("MEAN VALID F1       : " + str(np.mean(valid_f1s)))
    #print("MEAN TEST  F1       : " + str(np.mean(test_f1s)))
    print("MEAN TEST  F1 50/50 : " + str(np.mean(test_f1s_50)))
    return np.mean(test_f1s), np.mean(test_f1s_50)
def test(epochs = 1):
    ixs = range(len(X_train))
    random.shuffle(ixs)
    x_shf = X_train[ixs]
    y_shf = y_train[ixs]

    concat_X_train = []
    for i in range(len(ngram_filters)):
        concat_X_train.append(x_shf)

    results = model.fit(concat_X_train, y_shf, batch_size=batch_size, nb_epoch=epochs, validation_split=0.0, show_accuracy=True, verbose=1)
    predictions = model.predict_proba(concat_X_test)
    print("Xp shape:", predictions.shape)
    f1s = []
    for ix, tag in ix2tag.items():
        tag_predictions = predictions[:, ix]
        tag_predictions = [1 if p >= 0.5 else 0 for p in tag_predictions]
        tag_ys = y_test[:, ix]
        r, p, f1 = rpf1(tag_ys, tag_predictions)
        count = sum(tag_ys)
        print(tag.ljust(10), str(count).rjust(4), "recall", rnd(r), "precision", rnd(p), "f1", rnd(f1))
        f1s.append(f1)
    mean_f1 = np.mean(f1s)
    print("MEAN F1: " + str(mean_f1))
    return mean_f1
Exemple #6
0
def test(epochs=1):
    ixs = range(len(X_train))
    random.shuffle(ixs)
    x_shf = X_train[ixs]
    y_shf = y_train[ixs]

    concat_X_train = []
    for i in range(len(ngram_filters)):
        concat_X_train.append(x_shf)

    results = model.fit(concat_X_train,
                        y_shf,
                        batch_size=batch_size,
                        nb_epoch=epochs,
                        validation_split=0.0,
                        show_accuracy=True,
                        verbose=1)
    predictions = model.predict_proba(concat_X_test)
    print("Xp shape:", predictions.shape)
    f1s = []
    for ix, tag in ix2tag.items():
        tag_predictions = predictions[:, ix]
        tag_predictions = [1 if p >= 0.5 else 0 for p in tag_predictions]
        tag_ys = y_test[:, ix]
        r, p, f1 = rpf1(tag_ys, tag_predictions)
        count = sum(tag_ys)
        print(tag.ljust(10),
              str(count).rjust(4), "recall", rnd(r), "precision", rnd(p), "f1",
              rnd(f1))
        f1s.append(f1)
    mean_f1 = np.mean(f1s)
    print("MEAN F1: " + str(mean_f1))
    return mean_f1
Exemple #7
0
def test(epochs=1):

    ixs = range(len(X_train))
    random.shuffle(ixs)
    x_shf = X_train[ixs]
    y_shf = y_train[ixs]

    model.fit({
        "input": X_train,
        "output": y_train
    }, nb_epoch=epochs)  #64 seems good for now
    predictions = model.predict({"input": X_test, "output": y_test})["output"]
    print("Xp shape:", predictions.shape)
    f1s = []
    for ix, tag in ix2tag.items():
        tag_predictions = predictions[:, ix]
        tag_predictions = [1 if p >= 0.5 else 0 for p in tag_predictions]
        tag_ys = y_test[:, ix]
        r, p, f1 = rpf1(tag_ys, tag_predictions)
        count = sum(tag_ys)
        print(tag.ljust(10),
              str(count).rjust(4), "recall", rnd(r), "precision", rnd(p), "f1",
              rnd(f1))
        f1s.append(f1)
    mean_f1 = np.mean(f1s)
    print("MEAN F1: " + str(mean_f1))
    return mean_f1
    def test_learner():

        instances = [
            (["a", "b", "c", "d"], 1),
            (["a", "b", "d", "c"], 1),
            (["c", "a", "b"], 1),
            (["a", "b"], 1),

            (["a", "b", "e"], 0),
            (["c", "b", "a"], 0),
            (["a", "c"], 0),
            (["a", "c"], 0),
            (["b", "c"], 0),
            (["b", "d"], 0),
            (["d"], 0),

        ]

        xs, ys = zip(*instances)

        learner = RegExLearner(precision, f1_score, 2.0)
        learner.fit(xs, ys)
        pred = learner.predict(xs)

        print_positives(xs, ys)
        r, p, f1 = rpf1(ys, pred)
        print "TD:\n\tRecall: {0}\n\tPrecision: {1}\n\tF1: {2}\n".format(r, p, f1)
        print str(learner)
        pass
Exemple #9
0
def test(epochs = 1):
    results = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_split=0.0, show_accuracy=True, verbose=1)
    probs = flatten( model.predict_proba(X_test, batch_size=batch_size) )
    y_pred = [1 if p >= 0.5 else 0 for p in probs]
    r, p, f1 = rpf1(y_test, y_pred)
    print("recall", r, "precision", p, "f1", f1)
    return f1
Exemple #10
0
    def test_learner():

        instances = [
            (["a", "b", "c", "d"], 1),
            (["a", "b", "d", "c"], 1),
            (["c", "a", "b"], 1),
            (["a", "b"], 1),
            (["a", "b", "e"], 0),
            (["c", "b", "a"], 0),
            (["a", "c"], 0),
            (["a", "c"], 0),
            (["b", "c"], 0),
            (["b", "d"], 0),
            (["d"], 0),
        ]

        xs, ys = zip(*instances)

        learner = RegExLearner(precision, f1_score, 2.0)
        learner.fit(xs, ys)
        pred = learner.predict(xs)

        print_positives(xs, ys)
        r, p, f1 = rpf1(ys, pred)
        print "TD:\n\tRecall: {0}\n\tPrecision: {1}\n\tF1: {2}\n".format(
            r, p, f1)
        print str(learner)
        pass
def find_cutoff(y_test, predictions):
    scale = 20.0

    min_val = round(min(predictions))
    max_val = round(max(predictions))
    diff = max_val - min_val
    inc = diff / scale

    cutoff = -1
    best = -1
    for i in range(1, int(scale)+1, 1):
        val = inc * i
        classes = [1 if p >= val else 0 for p in predictions]
        r, p, f1 = rpf1(y_test, classes)
        if f1 >= best:
            cutoff = val
            best = f1

    classes = [1 if p >= cutoff else 0 for p in predictions]
    r, p, f1 = rpf1(y_test, classes)
    return r, p, f1, cutoff
Exemple #12
0
def find_cutoff(y_test, predictions):
    scale = 20.0

    min_val = round(min(predictions))
    max_val = round(max(predictions))
    diff = max_val - min_val
    inc = diff / scale

    cutoff = -1
    best = -1
    for i in range(1, int(scale)+1, 1):
        val = inc * i
        classes = [1 if p >= val else 0 for p in predictions]
        r, p, f1 = rpf1(y_test, classes)
        if f1 >= best:
            cutoff = val
            best = f1

    classes = [1 if p >= cutoff else 0 for p in predictions]
    r, p, f1 = rpf1(y_test, classes)
    return r, p, f1, cutoff
def test(epochs=1):
    results = model.fit(X_train,
                        y_train,
                        batch_size=batch_size,
                        nb_epoch=epochs,
                        validation_split=0.0,
                        show_accuracy=True,
                        verbose=1)
    classes = flatten(model.predict_classes(X_test, batch_size=batch_size))
    r, p, f1 = rpf1(y_test, classes)
    print("recall", r, "precision", p, "f1", f1)
    return f1
Exemple #14
0
def test(epochs=1):
    results = model.fit(X_train,
                        y_train,
                        batch_size=batch_size,
                        nb_epoch=epochs,
                        validation_split=0.0,
                        show_accuracy=True,
                        verbose=1)
    #valid_probs = model.predict_proba(X_valid, batch_size=batch_size)
    test_probs = model.predict_proba(X_test, batch_size=batch_size)
    valid_f1s = []
    test_f1s = []
    test_f1s_50 = []
    cutoff = 0
    for ix, tag in ix2tag.items():
        #valid_tag_predictions = valid_probs[:, ix]
        test_tag_predictions = test_probs[:, ix]

        #valid_tag_ys = y_valid[:, ix]
        test_tag_ys = y_test[:, ix]

        #r_v, p_v, f1_v, cutoff = find_cutoff(valid_tag_ys, valid_tag_predictions)
        #alid_f1s.append(f1_v)

        #test_classes =      [1 if p >= cutoff else 0 for p in test_tag_predictions]
        test_classes_5050 = [
            1 if p >= 0.5 else 0 for p in test_tag_predictions
        ]

        #r, p, f1 = rpf1(test_tag_ys, test_classes)
        r50, p50, f150 = rpf1(test_tag_ys, test_classes_5050)
        #print("VALIDATION:", tag.ljust(35), str(sum(valid_tag_ys)).ljust(3), "recall", rnd(r_v), "precision", rnd(p_v), "f1", rnd(f1_v), "cutoff", rnd(cutoff))
        #print("TEST      :", tag.ljust(35), str(sum(test_tag_ys)).ljust(3),  "recall", rnd(r),   "precision", rnd(p),   "f1", rnd(f1),   "cutoff", rnd(cutoff))
        print("TEST 50/50:", tag.ljust(35),
              str(sum(test_tag_ys)).ljust(3), "recall", rnd(r50), "precision",
              rnd(p50), "f1", rnd(f150), "cutoff", rnd(cutoff))

        #test_f1s.append(f1)
        test_f1s_50.append(f150)

    #print("MEAN VALID F1       : " + str(np.mean(valid_f1s)))
    #print("MEAN TEST  F1       : " + str(np.mean(test_f1s)))
    print("MEAN TEST  F1 50/50 : " + str(np.mean(test_f1s_50)))
    return np.mean(test_f1s), np.mean(test_f1s_50)
def test(epochs=1):

    ixs = range(len(X_train))
    random.shuffle(ixs)
    x_shf = X_train[ixs]
    y_shf = y_train[ixs]

    model.fit({"input": X_train, "output": y_train}, nb_epoch=epochs)#64 seems good for now
    predictions = model.predict({"input": X_test, "output": y_test})["output"]
    print("Xp shape:", predictions.shape)
    f1s = []
    for ix, tag in ix2tag.items():
        tag_predictions = predictions[:, ix]
        tag_predictions = [1 if p >= 0.5 else 0 for p in tag_predictions]
        tag_ys = y_test[:, ix]
        r, p, f1 = rpf1(tag_ys, tag_predictions)
        count = sum(tag_ys)
        print(tag.ljust(10), str(count).rjust(4), "recall", rnd(r), "precision", rnd(p), "f1", rnd(f1))
        f1s.append(f1)
    mean_f1 = np.mean(f1s)
    print("MEAN F1: " + str(mean_f1))
    return mean_f1
    def test_learner_on_data():
        import GwData
        import WordTokenizer

        code = "50"

        data = GwData.GwData()
        xs = WordTokenizer.tokenize(data.documents, spelling_correct=False)
        ys = data.labels_for(code)

        def rule_score_fn(act_ys, predicted):
            return precision(act_ys, predicted) * (recall(act_ys, predicted) ** 0.5)

        learner = RegExLearner(precision, f1_score, 2.5)
        learner.fit(xs, ys)
        pred = learner.predict(xs)

        # TD Performance
        print_positives(xs, ys)
        r, p, f1 = rpf1(ys, pred)
        print "TD:\n\tRecall: {0}\n\tPrecision: {1}\n\tF1: {2}\n".format(r, p, f1)
        print str(learner)
        pass
 def rule_score_fn(act_ys, predicted):
     r, p, f1 = rpf1(act_ys, predicted)
     return r * (p ** 0.5)
def test(epochs = 1):
    results = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_split=0.0, show_accuracy=True, verbose=1)
    classes = flatten( model.predict_classes(X_test, batch_size=batch_size) )
    r, p, f1 = rpf1(y_test, classes)
    print("recall", r, "precision", p, "f1", f1)
    return f1
Exemple #19
0
 def rule_score_fn(act_ys, predicted):
     r, p, f1 = rpf1(act_ys, predicted)
     return r * (p**0.5)
Exemple #20
0
 def score_fn(expected, actual):
     r,p,f1 = rpf1(expected, actual)
     return 1.0 - f1