Ejemplo n.º 1
0
def _get_tree():
    # Used to generate trees for the 'Create Tree' page
    course = request.form['name'].upper().replace(' ', '')
    # SEARCHCOURSE is a flag indicating this request came from the search bar
    search_course = course.endswith('SEARCHCOURSE')
    course = course.replace('SEARCHCOURSE', '', 1)
    if re.search(r'[A-Z]+', course) and not re.search(r'\d{3}', course):
        if course in {c for v in UW_DEPARTMENTS.values() for c in v}:
            img = graph_department(
                CATALOGS[CATALOGS['Department Name'] == course], course,
                request.url_root) if not search_course else course
        else:
            # ND -> Not a Department
            img = f'ND {course}'
    elif course in CATALOGS.index:
        if CATALOGS.loc[course,
                        'Prerequisites'] or CATALOGS.loc[course,
                                                         'Co-Requisites']:
            img = create_tree(
                CATALOGS, course,
                request.url_root) if not search_course else course
        else:
            # NP -> No Prerequisites
            img = f'NP {course}'
    else:
        # NA -> Not Available
        img = f'NA {course}' if course else ''
    return jsonify({'data': img})
def prune_validation(data):
    '''
    Performs complete evaluation of the decision tree algorithm with pruning

    Here are the steps:
    1. Split data into TEST and TRAINING+VALIDATION (x10 times)
        2. Split TRAINING+VALIDATION into TRAINING and VALIDATION (x9 times)
            3. For each TRAINING and VALIDATION:
                a) Train a tree using TRAINING
                b) Prune a tree using VALIDATION
                c) Test each pruned tree using TEST (9 trees x 10 test datasets = 90 measures)

    :param data: full dataset (clean_dataset OR noisy_dataset)
    :return: all_90_measures: list of [measures1, measures2, ..., measures90]
                                    where measure1 = [classification_rate, ...]
    '''

    # Shuffle and divide data
    divided_data = divide_data(data, 10)

    avg_errors = []
    all_90_measures = []

    for i in range(10):
        # Split TEST and TRAINING+VALIDATION (x10 times)
        test_data = divided_data[i]
        errors_on_this_test = []

        # Split TRAINING+VALIDATION ---> TRAINING and VALIDATION (x9 times)
        for j in range(1, 10):

            validation_data = divided_data[(i + j) % 10]
            training_data = np.concatenate([
                a for a in divided_data if not (a == test_data).all()
                and not (a == validation_data).all()
            ])

            # Train a tree
            tree = create_tree(training_data)
            decision_tree_learning(tree)

            # Prune tree on VALIDATION
            pruned_tree = prune(tree, validation_data)

            # Calculate error of pruned tree on TEST
            errors_on_this_test.append(1 - evaluate(test_data, pruned_tree)[0])

            # Evaluate pruned on TEST
            measures = evaluate(test_data, pruned_tree)

            # Collect all measures of the pruned tree
            all_90_measures.append(measures)

        # Collect error stats
        avg_err_on_this_test = sum(errors_on_this_test) / len(
            errors_on_this_test)
        avg_errors.append(avg_err_on_this_test)
        total_error = sum(avg_errors) / len(avg_errors)

    return all_90_measures
Ejemplo n.º 3
0
def cross_validation(data):
    '''
    Takes some data and performs cross validation to iterate over test and training data and train different trees to return the average performance
    :param data:
    :return:
    '''

    divided_data = divide_data(data, 10)

    all_10_measures = []

    for i in range(10):

        test_data = divided_data[i]

        training_data = np.concatenate([ a for a in divided_data if not (a==test_data).all()])

        tree = create_tree(training_data)

        learned_tree = decision_tree_learning(tree)

        measures = evaluate(test_data, learned_tree)

        all_10_measures.append(measures)

    return all_10_measures
Ejemplo n.º 4
0
def search():
    # Search used for the Search Bar in right corner of the Nav Bar
    course = request.form['name'].upper().replace(' ', '')
    if course in CATALOGS.index:
        # Create the Prerequisite Tree if necessary for the course page
        svg = create_tree(CATALOGS, course, request.url_root)
        return render_template('course.html',
                               svg=svg,
                               course=course,
                               course_data=CATALOGS.loc[course].to_dict())
    return redirect(url_for('index'))
Ejemplo n.º 5
0
def predict(title_tree):
    ho = np.zeros(hidden_size)
    for t in title_tree:
        global title
        title = t[0]
        tree = t[1]
        ctree = ct.create_tree(tree)
        h, c = create_graph(ctree)
        ho += h
    ho = ho / (len(title_tree))
    scores = np.dot(nt.w, ho) + nt.b
    sc = softmax(scores)
    sort = np.argmax(sc)
    confidence = np.max(sc)
    return sort, confidence
Ejemplo n.º 6
0
def department(department):
    if request.method == 'POST':
        return redirect(url_for('index'))
    department = department.upper()
    if not re.search(r'[A-Z]+\d+', department):
        department_chosen = {}
        department = department.replace('&', '&')
        department_df = CATALOGS[CATALOGS['Department Name'] == department]
        svg = graph_department(department_df, department, request.url_root)
        if not department_df.empty:
            for dict_ in department_df.to_dict(orient='records'):
                department_chosen[
                    f"{dict_['Department Name']}{dict_['Course Number']}"] = dict_
        return render_template('department.html',
                               course_dict=department_chosen,
                               department_dict={
                                   c: d
                                   for v in UW_DEPARTMENTS.values()
                                   for c, d in v.items()
                               },
                               url=request.url_root,
                               department=department,
                               in_dict=bool(department_chosen),
                               svg=svg)
    else:
        svg = create_tree(CATALOGS, department, request.url_root)
        in_dict = department in CATALOGS.index
        if in_dict:
            course_data = CATALOGS.loc[department].to_dict()
            split_course = re.compile(r'/|,|&&|;')
            for data in ['Prerequisites', 'Co-Requisites', 'Offered with']:
                course_data[data] = list(
                    filter(lambda x: x in CATALOGS.index and x != 'POI',
                           split_course.split(course_data[data])))
        else:
            course_data = None
        return render_template('course.html',
                               svg=svg,
                               course=department,
                               course_data=course_data,
                               in_dict=in_dict,
                               url=request.url_root)
Ejemplo n.º 7
0
import pandas as pd
import numpy as np
from create_tree import create_tree

customerDf = pd.read_csv("datasets/churn.data.simple.noDot")
del customerDf["state"]
del customerDf["area_code"]
del customerDf["international_plan"]
del customerDf["voice_mail_plan"]

tree = create_tree(customerDf)
print tree
Ejemplo n.º 8
0
                label = node.label
                plt.text(node_x, node_y, f"{label}", size=10,
                    ha="center", va="center",
                    bbox=dict(boxstyle="round",
                            ec=(1, 0.5, 0.5),
                            fc=(1, 0.8, 0.8),))

    plt.show()



if __name__ == '__main__':

    # Shuffle and divide data
    divided_data = divide_data(clean_dataset, 10)  # shuffles then divides data
    i = 0
    j = 1

    # Split the test data
    test_data = divided_data[i]

    # Split the data
    validation_data = divided_data[(i + j) % 10]
    training_data = np.concatenate(
        [a for a in divided_data if not (a == test_data).all() and not (a == validation_data).all()])

    # Train a tree
    tree = create_tree(training_data)
    decision_tree_learning(tree)

    plot_tree(tree)