def create_department_topic_matrix(self): """populate a dictionary of departments with topics (and weights) that best describe them""" doc_topic = self.model.transform(self.tf) # document-topic matrix depts = {} doc_titles = KeystractExtractor.get_doc_titles() dept_names = departments.get_departments() # associate documents with departments for i, row in enumerate(doc_topic): row = list(row) max_tup = (row.index(max(row)), max(row)) for dept in dept_names: if doc_titles[i] in dept_names[dept]: if dept not in depts: depts[dept] = [max_tup] else: depts[dept].append(max_tup) for d in depts: topics = list(set([n for n, _ in depts[d]])) new_topics = [(t, sum(n for i, n in depts[d] if i == t)) for t in topics] new_val = [0] * NUM_TOPICS for i, _ in enumerate(new_val): for j in new_topics: if j[0] == i: new_val[i] = j[1] new_val.insert(0, d) depts[d] = new_val for val in list(depts.values()): # for now, prints to console. should export as xlsx file. print('\t'.join([str(v) for v in val]))
def __init__(self, **kwargs): if 'data' in kwargs: data = kwargs['data'] else: raise ValueError('no data provided') self.contact_id = int(data['contact_id']); self.name = data['membership_name'] deps = get_departments() if self.name in deps: self.department = deps[self.name] else: sys.stderr.write('Unknown department: ' + self.name + "\n") self.department = None if data['status_id'] == 2: self.active = True else: self.active = False
import categories from database import Database if __name__ == '__main__': with Database('courses.db') as db: db.drop_and_create_raw_course_data_table() courses = set() for i in range(7): for j in range(14): print((i + 1, j + 1)) data = catalogs.get_courses_of_week(i + 1, j + 1) parsed_data = catalogs.parse(data) courses.update(parsed_data) departs = departments.get_departments() for college in departs.values(): for department in college: print(department) data = catalogs.get_courses_of_departments(department[0]) parsed_data = catalogs.parse(data) courses.update(parsed_data) cats = categories.get_categories() for programs in cats.values(): for program in programs: print(program) data = catalogs.get_courses_of_programs(program[0]) parsed_data = catalogs.parse(data) courses.update(parsed_data)
# -*- coding: utf-8 -*- # # Pirate Party Switzerland membership management script # Created by Stefan Thoeni at 2015-12-5 # # Assign member ids to new members # import sys import os from departments import get_departments def get_number(dep): return dep.number deps = get_departments() sys.stdout.write('department,1.0\n') sys.stdout.write('id,parent,name\n') depnums = list() for dep in sorted(deps.values(), key=get_number): if not dep.number in depnums: sys.stdout.write(str(dep.number) + ",") if dep.parent == None: sys.stdout.write("None,") else: sys.stdout.write(str(deps[dep.parent].fullname) + ",") sys.stdout.write(dep.fullname + "\n") depnums.append(dep.number)