Ejemplos de jaccard en Python, ejemplos de trajectory.utils.vector.jaccard en Python

Ejemplo n.º 1

0

Mostrar archivo

def evaluation(u=None, d=None):

    if u is None or d is None:
        return render_template("evaluate_landing.html")

    import numpy

    department = app.db.query(Department).join(University) \
            .filter(University.abbreviation==u) \
            .filter(Department.abbreviation==d) \
            .first()
    if department is None:
        abort(404)  # department not found

    # Retrieve the set of predicted and ground truth knowledge area labels
    # for each course.
    try:
        knowledge_areas = {
            'predicted': {
                course.id:
                predicted_knowledge_areas(course, result_set=g.result_set_raw)
                for course in department.courses
            },
            'truth': {
                course.id: ground_truth_knowledge_areas(course)
                for course in department.courses
            },
        }
    except RuntimeError:
        # Return empty knowledge area lists if an error is encountered.
        knowledge_areas = {
            'predicted': {course.id: []
                          for course in department.courses},
            'truth': {course.id: []
                      for course in department.courses},
        }

    # Calculate the jaccard coefficient and percentage correct of the
    # prediction/truth sets, use these as 'correctness' metrics.
    knowledge_areas['jaccard'] = {
        course.id: float(
            jaccard(knowledge_areas['predicted'][course.id],
                    knowledge_areas['truth'][course.id]))
        for course in department.courses if knowledge_areas['truth'][course.id]
    }
    knowledge_areas['percent'] = {
            course.id:
                float(len(set(knowledge_areas['predicted'][course.id])\
                    .intersection(set(knowledge_areas['truth'][course.id])))\
                / len(knowledge_areas['truth'][course.id]))
            for course in department.courses
            if knowledge_areas['truth'][course.id]
    }

    return render_template(
        "evaluate_department.html",
        department=department,
        knowledge_areas=knowledge_areas,
    )

Ejemplo n.º 2

0

Mostrar archivo

Archivo: web.py Proyecto: jrouly/trajectory

def evaluation(u=None, d=None):

    if u is None or d is None:
        return render_template("evaluate_landing.html")

    import numpy

    department = app.db.query(Department).join(University) \
            .filter(University.abbreviation==u) \
            .filter(Department.abbreviation==d) \
            .first()
    if department is None:
        abort(404) # department not found

    # Retrieve the set of predicted and ground truth knowledge area labels
    # for each course.
    try:
        knowledge_areas = {
                'predicted': {
                    course.id: predicted_knowledge_areas(
                                        course,
                                        result_set=g.result_set_raw)
                        for course in department.courses
                },
                'truth': {
                    course.id: ground_truth_knowledge_areas(course)
                        for course in department.courses
                },
        }
    except RuntimeError:
        # Return empty knowledge area lists if an error is encountered.
        knowledge_areas = {
            'predicted': {course.id: [] for course in department.courses},
            'truth': {course.id: [] for course in department.courses},
        }

    # Calculate the jaccard coefficient and percentage correct of the
    # prediction/truth sets, use these as 'correctness' metrics.
    knowledge_areas['jaccard'] = {
            course.id: float(jaccard(
                knowledge_areas['predicted'][course.id],
                knowledge_areas['truth'][course.id]
            )) for course in department.courses
            if knowledge_areas['truth'][course.id]
    }
    knowledge_areas['percent'] = {
            course.id:
                float(len(set(knowledge_areas['predicted'][course.id])\
                    .intersection(set(knowledge_areas['truth'][course.id])))\
                / len(knowledge_areas['truth'][course.id]))
            for course in department.courses
            if knowledge_areas['truth'][course.id]
    }

    return render_template("evaluate_department.html",
            department=department,
            knowledge_areas=knowledge_areas,)

Ejemplo n.º 3

0

Mostrar archivo

def compare_departments(daid=None, dbid=None):

    if None in [daid, dbid]:
        departments = app.db.query(Department).all()
        return render_template("compare_departments_landing.html",
                               departments=departments)

    # Look up references to requested departments.
    department_a = app.db.query(Department).get(daid)
    department_b = app.db.query(Department).get(dbid)

    # If either department isn't found, or if there is no result set
    # (meaning no topics to infer) then simply 404.
    if department_a is None or department_b is None or g.result_set_raw is None:
        abort(404)

    # Identify a set of topics for each department.
    department_a_topics = set(topic_list(department_a, g.result_set_raw))
    department_b_topics = set(topic_list(department_b, g.result_set_raw))

    # Generate topic vectors for the two departments.
    a_vector = topic_vector(department_a, g.result_set_raw)
    b_vector = topic_vector(department_b, g.result_set_raw)
    a_vector_string = a_vector.unpack(one=b'1', zero=b'0').decode('utf-8')
    b_vector_string = b_vector.unpack(one=b'1', zero=b'0').decode('utf-8')

    # Run similarity metrics.
    similarity = dict()
    similarity['jaccard'] = {
        'name': 'Jaccard Index',
        'range': '[0, 1]',
        'description': 'Comparative set cardinality.',
        'value': jaccard(department_a_topics, department_b_topics),
    }
    similarity['cosine'] = {
        'name': 'Cosine Similarity',
        'range': '[-1, 1]',
        'description': 'Geometric cosine distance.',
        'value': cosine_similarity(a_vector, b_vector),
    }
    similarity['euclidean'] = {
        'name': 'Euclidean Distance',
        'description': 'Geometric vector distance.',
        'value': euclidean_distance(a_vector, b_vector),
    }

    # Remove common topics from the topic sets.
    intersection = department_a_topics & department_b_topics
    department_a_topics = department_a_topics - intersection
    department_b_topics = department_b_topics - intersection

    # Number of courses in each department.
    num_courses_a = app.db.query(Course).join(Department) \
            .filter(Department.id==daid).count()
    num_courses_b = app.db.query(Course).join(Department) \
            .filter(Department.id==dbid).count()

    # Global list of departments for switching over.
    departments = app.db.query(Department).all()

    return render_template(
        "compare_departments.html",
        da=department_a,
        db=department_b,
        da_topics=department_a_topics,
        db_topics=department_b_topics,
        num_courses_a=num_courses_a,
        num_courses_b=num_courses_b,
        common_topics=intersection,
        departments=departments,
        similarity_metrics=similarity,
        da_vector=a_vector_string,
        db_vector=b_vector_string,
    )

Ejemplo n.º 4

0

Mostrar archivo

Archivo: plot-ka-correctness.py Proyecto: jrouly/trajectory

                course.id: predicted_knowledge_areas(course, rs)
                for course in gmu_cs.courses
            },
            'truth': {
                course.id: ground_truth_knowledge_areas(course)
                for course in gmu_cs.courses
            },
        } for rs in result_sets
]
print("Done.")

print("Calculate jaccard and percent metrics...")
for ka_dict in knowledge_areas:
    ka_dict['jaccard'] = {
            course.id: jaccard(
                ka_dict['predicted'][course.id],
                ka_dict['truth'][course.id]
            ) for course in gmu_cs.courses
            if None not in [
                ka_dict['predicted'][course.id],
                ka_dict['truth'][course.id]
            ]
    }
    ka_dict['percent'] = {
            course.id: len(set(ka_dict['predicted'][course.id])\
                            .intersection(set(ka_dict['truth'][course.id])))\
                        / len(ka_dict['truth'][course.id])
            for course in gmu_cs.courses
            if ka_dict['truth'][course.id]
    }
print("Done.")

Ejemplo n.º 5

0

Mostrar archivo

Archivo: 10by10.py Proyecto: jrouly/trajectory

def compare(depA, depB, rs):
    dep_a_topics = topic_list(depA, rs)
    dep_b_topics = topic_list(depB, rs)
    return jaccard(dep_a_topics, dep_b_topics)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: web.py Proyecto: jrouly/trajectory

def compare_departments(daid=None, dbid=None):

    if None in [daid, dbid]:
        departments = app.db.query(Department).all()
        return render_template("compare_departments_landing.html",
                departments=departments)

    # Look up references to requested departments.
    department_a = app.db.query(Department).get(daid)
    department_b = app.db.query(Department).get(dbid)

    # If either department isn't found, or if there is no result set
    # (meaning no topics to infer) then simply 404.
    if department_a is None or department_b is None or g.result_set_raw is None:
        abort(404)

    # Identify a set of topics for each department.
    department_a_topics = set(topic_list(department_a, g.result_set_raw))
    department_b_topics = set(topic_list(department_b, g.result_set_raw))

    # Generate topic vectors for the two departments.
    a_vector = topic_vector(department_a, g.result_set_raw)
    b_vector = topic_vector(department_b, g.result_set_raw)
    a_vector_string = a_vector.unpack(one=b'1', zero=b'0').decode('utf-8')
    b_vector_string = b_vector.unpack(one=b'1', zero=b'0').decode('utf-8')

    # Run similarity metrics.
    similarity = dict()
    similarity['jaccard'] = {
            'name': 'Jaccard Index',
            'range': '[0, 1]',
            'description': 'Comparative set cardinality.',
            'value': jaccard(department_a_topics, department_b_topics),
    }
    similarity['cosine'] = {
            'name': 'Cosine Similarity',
            'range': '[-1, 1]',
            'description': 'Geometric cosine distance.',
            'value': cosine_similarity(a_vector, b_vector),
    }
    similarity['euclidean'] = {
            'name': 'Euclidean Distance',
            'description': 'Geometric vector distance.',
            'value': euclidean_distance(a_vector, b_vector),
    }

    # Remove common topics from the topic sets.
    intersection = department_a_topics & department_b_topics
    department_a_topics = department_a_topics - intersection
    department_b_topics = department_b_topics - intersection

    # Number of courses in each department.
    num_courses_a = app.db.query(Course).join(Department) \
            .filter(Department.id==daid).count()
    num_courses_b = app.db.query(Course).join(Department) \
            .filter(Department.id==dbid).count()

    # Global list of departments for switching over.
    departments = app.db.query(Department).all()

    return render_template("compare_departments.html",
            da=department_a,
            db=department_b,

            da_topics=department_a_topics,
            db_topics=department_b_topics,

            num_courses_a=num_courses_a,
            num_courses_b=num_courses_b,

            common_topics=intersection,

            departments=departments,

            similarity_metrics=similarity,

            da_vector=a_vector_string,
            db_vector=b_vector_string,
    )