Esempio n. 1
0
def get_avg_degree_by_department(g, nbr_direction='in'):
    '''Given a set of edges, calculate the average weight by department.

    This assumes that there is some particular node of interest and that
    these are neighbors of a 'target' node. See usage of function in
    node_department_out_degree and node_department_in_degree for details.

    params:
    - edge_data (edge (tuple), weight (float))  describes edge interaction
    - nbr_direction (string)    target node relationship with neighbor
    '''
    edge_data = [((course_id, i), g.get_edge_data(course_id, i)['weight'])
                 for i in out_nbrs]
    sum_count = defaultdict(int)
    sum_weight = defaultdict(float)
    for edge, weight in edge_data:
        if nbr_direction == 'in':
            nbr_department = get_prefix(edge[0])  # neighbor -> target
        elif nbr_direction == 'out':
            nbr_department = get_prefix(edge[1])
        sum_count[nbr_department] += 1
        sum_weight[nbr_department] += weight

    avg_weight = {}
    for department in sum_count.keys():
        avg_weight[department] = sum_weight[department] / \
            sum_count[department]
    return avg_weight
Esempio n. 2
0
 def extract_departments(self):
     self.department_clusters = dict()
     prefixes = {get_prefix(course) for course in self.g.nodes}
     for prefix in prefixes:
         cluster = {
             course for course in self.g.nodes
             if get_prefix(course) == prefix
         }
         self.department_clusters[prefix] = cluster
Esempio n. 3
0
def extract_departments(g):
    department_clusters = {}
    prefixes = {get_prefix(course) for course in g.nodes}
    for prefix in prefixes:
        cluster = {
            course
            for course in g.nodes if get_prefix(course) == prefix
        }
        department_clusters[prefix] = cluster
    return department_clusters
Esempio n. 4
0
def stopgap_nodes(g, department='', k=20):
    department_node_ids = [
        node for node in g.nodes if department == get_prefix(node)
    ]

    res = []
    for node_id in department_node_ids:
        print(node_id)
        # out degree leading into the department
        out_degree_internal = sum([
            i[2] for i in nx.edge_boundary(
                g, [node_id], department_node_ids, data='weight')
        ])
        sample_edges = [edge for edge in g.edges if edge[1] == node_id]
        if len(sample_edges) == 0:
            continue
        sample_edge = sample_edges[0]
        count = g.get_edge_data(sample_edge[0], sample_edge[1])['count_course']

        if out_degree_internal == 0:
            out_degree_internal = 1
        print(
            f"{int(out_degree_internal)} total {department} courses enrolled by {int(count)} students after course {node_id}"
        )
        coeff = out_degree_internal / count
        res.append((node_id, coeff))

    res = sorted(res, key=lambda x: x[1], reverse=True)[:k]
    return res
Esempio n. 5
0
def num_courses(g, k=20):
    res = Counter([get_prefix(course) for course in g.nodes])
    res = sorted(list(num_courses.items()), key=lambda x: x[1],
                 reverse=True)[:k]
    return res
Esempio n. 6
0
def plot_pagerank_by_department():
    k = 9
    years = ['2010-2018']  # ['2000-2008', '2005-2013', '2010-2018']
    experiments = ['experiments/final/pagerank_ug_counts'
                   ]  # 'experiments/final/pagerank_ba_counts']
    comparison_type = 'pagerank.json'

    plt.figure(figsize=(15, 10))
    the_grid = GridSpec(len(experiments), len(years))
    the_grid.update(wspace=0.025, hspace=0.05)
    for i, experiment_dir in enumerate(experiments):
        for j, year in enumerate(years):
            plt.subplot(the_grid[i, j], aspect=1)
            with open(os.path.join(experiment_dir, year, comparison_type),
                      'r') as f:
                d = json.load(f)

            departments = defaultdict(float)
            for entry, pagerank in d.items():
                is_lang = fnmatch.filter(entry, '*LANG*') or fnmatch.filter(
                    entry, '*LNG*')
                if is_lang:
                    prefix = 'LANG'
                else:
                    prefix = get_prefix(entry)
                departments[prefix] += pagerank

            departments = sorted(departments.items(),
                                 key=lambda x: x[1],
                                 reverse=True)
            top_k = departments[:k]
            other = [
                ('OTHER',
                 sum([pagerank for department, pagerank in departments[k:]]))
            ]

            groups = (top_k + other)
            labels, values = zip(*groups)
            print(sum(d.values()))
            values = np.array(values) * 10

            my_circle = plt.Circle((0, 0), 0.4, color='white')
            plt.pie(values,
                    labels=labels,
                    colors=sns.light_palette(
                        '#99CCFF' if i == 0 else '#FC0E1C',
                        n_colors=k + 1,
                        reverse=True),
                    wedgeprops={
                        "edgecolor": "white",
                        'linewidth': 3,
                        'antialiased': True
                    },
                    textprops={'fontsize': 10})
            p = plt.gcf()
            p.gca().add_artist(my_circle)
            if i == 0:
                plt.title(year)

    # plt.savefig('figures/figures_pdf/2010-2018-pr.pdf')
    plt.show()
Esempio n. 7
0
 def num_courses(self):
     res = Counter([get_prefix(course) for course in self.g.nodes])
     res = sorted(
         list(num_courses.items()), key=lambda x: x[1], reverse=True
     )[:self.top_k]
     return res