def get_avg_degree_by_department(g, nbr_direction='in'): '''Given a set of edges, calculate the average weight by department. This assumes that there is some particular node of interest and that these are neighbors of a 'target' node. See usage of function in node_department_out_degree and node_department_in_degree for details. params: - edge_data (edge (tuple), weight (float)) describes edge interaction - nbr_direction (string) target node relationship with neighbor ''' edge_data = [((course_id, i), g.get_edge_data(course_id, i)['weight']) for i in out_nbrs] sum_count = defaultdict(int) sum_weight = defaultdict(float) for edge, weight in edge_data: if nbr_direction == 'in': nbr_department = get_prefix(edge[0]) # neighbor -> target elif nbr_direction == 'out': nbr_department = get_prefix(edge[1]) sum_count[nbr_department] += 1 sum_weight[nbr_department] += weight avg_weight = {} for department in sum_count.keys(): avg_weight[department] = sum_weight[department] / \ sum_count[department] return avg_weight
def extract_departments(self): self.department_clusters = dict() prefixes = {get_prefix(course) for course in self.g.nodes} for prefix in prefixes: cluster = { course for course in self.g.nodes if get_prefix(course) == prefix } self.department_clusters[prefix] = cluster
def extract_departments(g): department_clusters = {} prefixes = {get_prefix(course) for course in g.nodes} for prefix in prefixes: cluster = { course for course in g.nodes if get_prefix(course) == prefix } department_clusters[prefix] = cluster return department_clusters
def stopgap_nodes(g, department='', k=20): department_node_ids = [ node for node in g.nodes if department == get_prefix(node) ] res = [] for node_id in department_node_ids: print(node_id) # out degree leading into the department out_degree_internal = sum([ i[2] for i in nx.edge_boundary( g, [node_id], department_node_ids, data='weight') ]) sample_edges = [edge for edge in g.edges if edge[1] == node_id] if len(sample_edges) == 0: continue sample_edge = sample_edges[0] count = g.get_edge_data(sample_edge[0], sample_edge[1])['count_course'] if out_degree_internal == 0: out_degree_internal = 1 print( f"{int(out_degree_internal)} total {department} courses enrolled by {int(count)} students after course {node_id}" ) coeff = out_degree_internal / count res.append((node_id, coeff)) res = sorted(res, key=lambda x: x[1], reverse=True)[:k] return res
def num_courses(g, k=20): res = Counter([get_prefix(course) for course in g.nodes]) res = sorted(list(num_courses.items()), key=lambda x: x[1], reverse=True)[:k] return res
def plot_pagerank_by_department(): k = 9 years = ['2010-2018'] # ['2000-2008', '2005-2013', '2010-2018'] experiments = ['experiments/final/pagerank_ug_counts' ] # 'experiments/final/pagerank_ba_counts'] comparison_type = 'pagerank.json' plt.figure(figsize=(15, 10)) the_grid = GridSpec(len(experiments), len(years)) the_grid.update(wspace=0.025, hspace=0.05) for i, experiment_dir in enumerate(experiments): for j, year in enumerate(years): plt.subplot(the_grid[i, j], aspect=1) with open(os.path.join(experiment_dir, year, comparison_type), 'r') as f: d = json.load(f) departments = defaultdict(float) for entry, pagerank in d.items(): is_lang = fnmatch.filter(entry, '*LANG*') or fnmatch.filter( entry, '*LNG*') if is_lang: prefix = 'LANG' else: prefix = get_prefix(entry) departments[prefix] += pagerank departments = sorted(departments.items(), key=lambda x: x[1], reverse=True) top_k = departments[:k] other = [ ('OTHER', sum([pagerank for department, pagerank in departments[k:]])) ] groups = (top_k + other) labels, values = zip(*groups) print(sum(d.values())) values = np.array(values) * 10 my_circle = plt.Circle((0, 0), 0.4, color='white') plt.pie(values, labels=labels, colors=sns.light_palette( '#99CCFF' if i == 0 else '#FC0E1C', n_colors=k + 1, reverse=True), wedgeprops={ "edgecolor": "white", 'linewidth': 3, 'antialiased': True }, textprops={'fontsize': 10}) p = plt.gcf() p.gca().add_artist(my_circle) if i == 0: plt.title(year) # plt.savefig('figures/figures_pdf/2010-2018-pr.pdf') plt.show()
def num_courses(self): res = Counter([get_prefix(course) for course in self.g.nodes]) res = sorted( list(num_courses.items()), key=lambda x: x[1], reverse=True )[:self.top_k] return res